It adds PMU boot support.It loads PMU firmware into PMU falcon.RM/Kernel driver receives INIT ack (through interrupt mechanism) from PMU when PMU boots with success. Signed-off-by: Deepak Goyal <dgoyal at nvidia.com> --- drm/nouveau/include/nvkm/subdev/pmu.h | 26 +- drm/nouveau/nvkm/subdev/pmu/base.c | 108 ++ drm/nouveau/nvkm/subdev/pmu/gk20a.c | 2131 ++++++++++++++++++++++++++++++++- drm/nouveau/nvkm/subdev/pmu/gk20a.h | 369 ++++++ drm/nouveau/nvkm/subdev/pmu/priv.h | 264 ++++ 5 files changed, 2884 insertions(+), 14 deletions(-) create mode 100644 drm/nouveau/nvkm/subdev/pmu/gk20a.h diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h b/drm/nouveau/include/nvkm/subdev/pmu.h index 7b86acc634a0..659b4e0ba02b 100644 --- a/drm/nouveau/include/nvkm/subdev/pmu.h +++ b/drm/nouveau/include/nvkm/subdev/pmu.h @@ -1,7 +1,20 @@ #ifndef __NVKM_PMU_H__ #define __NVKM_PMU_H__ #include <core/subdev.h> +#include <core/device.h> +#include <subdev/mmu.h> +#include <linux/debugfs.h> +struct pmu_buf_desc { + struct nvkm_gpuobj *pmubufobj; + struct nvkm_vma pmubufvma; + size_t size; +}; +struct pmu_priv_vm { + struct nvkm_gpuobj *mem; + struct nvkm_gpuobj *pgd; + struct nvkm_vm *vm; +}; struct nvkm_pmu { struct nvkm_subdev base; @@ -20,9 +33,20 @@ struct nvkm_pmu { u32 message; u32 data[2]; } recv; - + wait_queue_head_t init_wq; + bool gr_initialised; + struct dentry *debugfs; + struct pmu_buf_desc *pg_buf; + struct pmu_priv_vm *pmuvm; int (*message)(struct nvkm_pmu *, u32[2], u32, u32, u32, u32); void (*pgob)(struct nvkm_pmu *, bool); + int (*pmu_mutex_acquire)(struct nvkm_pmu *, u32 id, u32 *token); + int (*pmu_mutex_release)(struct nvkm_pmu *, u32 id, u32 *token); + int (*pmu_load_norm)(struct nvkm_pmu *pmu, u32 *load); + int (*pmu_load_update)(struct nvkm_pmu *pmu); + void (*pmu_reset_load_counters)(struct nvkm_pmu *pmu); + void (*pmu_get_load_counters)(struct nvkm_pmu *pmu, u32 *busy_cycles, + u32 *total_cycles); }; static inline struct nvkm_pmu * diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c b/drm/nouveau/nvkm/subdev/pmu/base.c index 054b2d2eec35..6afd389b9764 100644 --- a/drm/nouveau/nvkm/subdev/pmu/base.c +++ b/drm/nouveau/nvkm/subdev/pmu/base.c @@ -25,6 +25,114 @@ #include <subdev/timer.h> +/* init allocator struct */ +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator, + const char *name, u32 start, u32 len) +{ + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator)); + + strncpy(allocator->name, name, 32); + + allocator->base = start; + allocator->limit = start + len - 1; + + allocator->bitmap = kcalloc(BITS_TO_LONGS(len), sizeof(long), + GFP_KERNEL); + if (!allocator->bitmap) + return -ENOMEM; + + allocator_dbg(allocator, "%s : base %d, limit %d", + allocator->name, allocator->base); + + init_rwsem(&allocator->rw_sema); + + allocator->alloc = nvkm_pmu_allocator_block_alloc; + allocator->free = nvkm_pmu_allocator_block_free; + + return 0; +} + +/* destroy allocator, free all remaining blocks if any */ +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator) +{ + down_write(&allocator->rw_sema); + + kfree(allocator->bitmap); + + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator)); +} + +/* + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is + * returned to caller in *addr. + * + * contiguous allocation, which allocates one block of + * contiguous address. +*/ +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator, + u32 *addr, u32 len, u32 align) +{ + unsigned long _addr; + + allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); + + if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ + *addr + len > allocator->limit || /* check addr range */ + *addr & (align - 1) || /* check addr alignment */ + len == 0) /* check len */ + return -EINVAL; + + len = ALIGN(len, align); + if (!len) + return -ENOMEM; + + down_write(&allocator->rw_sema); + + _addr = bitmap_find_next_zero_area(allocator->bitmap, + allocator->limit - allocator->base + 1, + *addr ? (*addr - allocator->base) : 0, + len, + align - 1); + if ((_addr > allocator->limit - allocator->base + 1) || + (*addr && *addr != (_addr + allocator->base))) { + up_write(&allocator->rw_sema); + return -ENOMEM; + } + + bitmap_set(allocator->bitmap, _addr, len); + *addr = allocator->base + _addr; + + up_write(&allocator->rw_sema); + + allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); + + return 0; +} + +/* free all blocks between start and end */ +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator, + u32 addr, u32 len, u32 align) +{ + allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); + + if (addr + len > allocator->limit || /* check addr range */ + addr < allocator->base || + addr & (align - 1)) /* check addr alignment */ + return -EINVAL; + + len = ALIGN(len, align); + if (!len) + return -EINVAL; + + down_write(&allocator->rw_sema); + bitmap_clear(allocator->bitmap, addr - allocator->base, len); + up_write(&allocator->rw_sema); + + allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); + + return 0; +} + void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable) { diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c index a49934bbe637..0fd2530301a3 100644 --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c @@ -20,21 +20,67 @@ * DEALINGS IN THE SOFTWARE. */ #include "priv.h" +#include "gk20a.h" +#include <core/client.h> +#include <core/gpuobj.h> +#include <subdev/bar.h> +#include <subdev/fb.h> +#include <subdev/mc.h> +#include <subdev/timer.h> +#include <subdev/mmu.h> +#include <subdev/pmu.h> +#include <engine/falcon.h> +#include <linux/delay.h> /* for mdelay */ +#include <linux/firmware.h> +#include <linux/clk.h> +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/dma-mapping.h> +#include <linux/uaccess.h> #include <subdev/clk.h> #include <subdev/timer.h> #include <subdev/volt.h> #define BUSY_SLOT 0 #define CLK_SLOT 7 +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" + +static int falc_trace_show(struct seq_file *s, void *data); +static int falc_trace_open(struct inode *inode, struct file *file) +{ + return single_open(file, falc_trace_show, inode->i_private); +} +static const struct file_operations falc_trace_fops = { + .open = falc_trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +struct pmu_priv_vm pmuvm; +const struct firmware *pmufw; + +static void gk20a_pmu_isr(struct nvkm_pmu *ppmu); +static void pmu_process_message(struct work_struct *work); + +static int +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw); +static void +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, const struct firmware *fw); + +static int +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw); +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu); +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc); +static void gk20a_pmu_intr(struct nvkm_subdev *subdev); +static void gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable); struct gk20a_pmu_dvfs_data { int p_load_target; int p_load_max; int p_smooth; unsigned int avg_load; }; - struct gk20a_pmu_priv { struct nvkm_pmu base; struct nvkm_alarm alarm; @@ -46,7 +92,30 @@ struct gk20a_pmu_dvfs_dev_status { unsigned long busy; int cur_state; }; - +int gk20a_pmu_debugfs_init(struct nvkm_pmu *ppmu) +{ + struct dentry *d; + ppmu->debugfs = debugfs_create_dir("PMU", NULL); + if (!ppmu->debugfs) + goto err_out; + nv_debug(ppmu, "PMU directory created with success\n"); + d = debugfs_create_file( + "falc_trace", 0644, ppmu->debugfs, ppmu, + &falc_trace_fops); + if (!d) + goto err_out; + return 0; +err_out: + pr_err("%s: Failed to make debugfs node\n", __func__); + debugfs_remove_recursive(ppmu->debugfs); + return -ENOMEM; +} +void gk20a_pmu_release_firmware(struct nvkm_pmu *ppmu, + const struct firmware *pfw) +{ + nv_debug(ppmu, "firmware released\n"); + release_firmware(pfw); +} static int gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state) { @@ -164,31 +233,145 @@ gk20a_pmu_fini(struct nvkm_object *object, bool suspend) { struct nvkm_pmu *pmu = (void *)object; struct gk20a_pmu_priv *priv = (void *)pmu; - + nv_wr32(pmu, 0x10a014, 0x00000060); + flush_work(&pmu->recv.work); nvkm_timer_alarm_cancel(priv, &priv->alarm); return nvkm_subdev_fini(&pmu->base, suspend); } +static bool find_hex_in_string(char *strings, u32 *hex_pos) +{ + u32 i = 0, j = strlen(strings); + for (; i < j; i++) { + if (strings[i] == '%') + if (strings[i + 1] == 'x' || strings[i + 1] == 'X') { + *hex_pos = i; + return true; + } + } + *hex_pos = -1; + return false; +} +static int falc_trace_show(struct seq_file *s, void *data) +{ + struct nvkm_pmu *ppmu = s->private; + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + u32 i = 0, j = 0, k, l, m; + char part_str[40]; + u32 data1; + char *log_data = kmalloc(GK20A_PMU_TRACE_BUFSIZE, GFP_KERNEL); + char *trace = log_data; + u32 *trace1 = (u32 *)log_data; + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 4) { + data1 = nv_ro32(pmu->trace_buf.pmubufobj, 0x0000 + i); + memcpy(log_data + i, (void *)(&data1), 32); + } + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { + for (j = 0; j < 0x40; j++) + if (trace1[(i / 4) + j]) + break; + if (j == 0x40) + goto out; + seq_printf(s, "Index %x: ", trace1[(i / 4)]); + l = 0; + m = 0; + while (find_hex_in_string((trace+i+20+m), &k)) { + if (k >= 40) + break; + strncpy(part_str, (trace+i+20+m), k); + part_str[k] = 0; + seq_printf(s, "%s0x%x", part_str, + trace1[(i / 4) + 1 + l]); + l++; + m += k + 2; + } + seq_printf(s, "%s", (trace+i+20+m)); + } +out: + kfree(log_data); + return 0; +} int gk20a_pmu_init(struct nvkm_object *object) { - struct nvkm_pmu *pmu = (void *)object; - struct gk20a_pmu_priv *priv = (void *)pmu; + struct nvkm_pmu *ppmu = (void *)object; + struct nvkm_mc *pmc = nvkm_mc(object); + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu; + struct gk20a_pmu_priv *priv; + struct pmu_gk20a_data *gk20adata; int ret; - ret = nvkm_subdev_init(&pmu->base); + pmu = &impl->pmudata; + + nv_subdev(ppmu)->intr = gk20a_pmu_intr; + + mutex_init(&pmu->isr_mutex); + mutex_init(&pmu->pmu_copy_lock); + mutex_init(&pmu->pmu_seq_lock); + + if (pmufw == NULL) { + ret = gk20a_pmu_load_firmware(ppmu, &pmufw); + if (ret < 0) { + nv_error(ppmu, "failed to load pmu fimware\n"); + return ret; + } + nv_debug(ppmu, "loading firmware sucessful\n"); + ret = gk20a_pmu_init_vm(ppmu, pmufw); + if (ret < 0) { + nv_error(ppmu, "failed to map pmu fw to va space\n"); + goto init_vm_err; + } + } + pmu->desc = (struct pmu_ucode_desc *)pmufw->data; + gk20a_pmu_dump_firmware_info(ppmu, pmufw); + + if (pmu->desc->app_version != APP_VERSION_GK20A) { + nv_error(ppmu, + "PMU code version not supported version: %d\n", + pmu->desc->app_version); + ret = -EINVAL; + goto app_ver_err; + } + gk20adata = kzalloc(sizeof(*gk20adata), GFP_KERNEL); + if (!gk20adata) { + ret = -ENOMEM; + goto err; + } + + pmu->pmu_chip_data = (void *)gk20adata; + + pmu->remove_support = gk20a_remove_pmu_support; + + ret = gk20a_init_pmu_setup_sw(ppmu); if (ret) - return ret; + goto err; + + pmu->pmu_state = PMU_STATE_STARTING; + ret = gk20a_init_pmu_setup_hw1(ppmu, pmc); + if (ret) + goto err; + + priv = (void *)ppmu; - pmu->pgob = nvkm_pmu_pgob; + ret = nvkm_subdev_init(&ppmu->base); + if (ret) + goto err; + + ppmu->pgob = nvkm_pmu_pgob; - /* init pwr perf counter */ - nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); - nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); - nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); + /* init pmu perf counter */ + nv_wr32(ppmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); + nv_wr32(ppmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); + nv_wr32(ppmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); - nvkm_timer_alarm(pmu, 2000000000, &priv->alarm); + nvkm_timer_alarm(ppmu, 2000000000, &priv->alarm); +err: +init_vm_err: +app_ver_err: + gk20a_pmu_release_firmware(ppmu, pmufw); return ret; } @@ -226,4 +409,1926 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) { .init = gk20a_pmu_init, .fini = gk20a_pmu_fini, }, + .base.handle = NV_SUBDEV(PMU, 0xea), + .pgob = gk20a_pmu_pgob, }.base; +void pmu_copy_from_dmem(struct pmu_desc *pmu, + u32 src, u8 *dst, u32 size, u8 port) +{ + u32 i, words, bytes; + u32 data, addr_mask; + u32 *dst_u32 = (u32 *)dst; + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + + if (size == 0) { + nv_error(ppmu, "size is zero\n"); + goto out; + } + + if (src & 0x3) { + nv_error(ppmu, "src (0x%08x) not 4-byte aligned\n", src); + goto out; + } + + mutex_lock(&pmu->pmu_copy_lock); + + words = size >> 2; + bytes = size & 0x3; + + addr_mask = (0x3f << 2) | 0xff << 8; + + src &= addr_mask; + + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (src | (0x1 << 25))); + + for (i = 0; i < words; i++) { + dst_u32[i] = nv_rd32(ppmu, (0x0010a1c4 + port * 8)); + nv_debug(ppmu, "0x%08x\n", dst_u32[i]); + } + if (bytes > 0) { + data = nv_rd32(ppmu, (0x0010a1c4 + port * 8)); + nv_debug(ppmu, "0x%08x\n", data); + + for (i = 0; i < bytes; i++) + dst[(words << 2) + i] = ((u8 *)&data)[i]; + } + mutex_unlock(&pmu->pmu_copy_lock); +out: + nv_debug(ppmu, "exit %s\n", __func__); +} + +void pmu_copy_to_dmem(struct pmu_desc *pmu, + u32 dst, u8 *src, u32 size, u8 port) +{ + u32 i, words, bytes; + u32 data, addr_mask; + u32 *src_u32 = (u32 *)src; + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + + if (size == 0) { + nv_error(ppmu, "size is zero\n"); + goto out; + } + + if (dst & 0x3) { + nv_error(ppmu, "dst (0x%08x) not 4-byte aligned\n", dst); + goto out; + } + + mutex_lock(&pmu->pmu_copy_lock); + + words = size >> 2; + bytes = size & 0x3; + + addr_mask = (0x3f << 2) | 0xff << 8; + + dst &= addr_mask; + + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24))); + + for (i = 0; i < words; i++) { + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), src_u32[i]); + nv_debug(ppmu, "0x%08x\n", src_u32[i]); + } + if (bytes > 0) { + data = 0; + for (i = 0; i < bytes; i++) + ((u8 *)&data)[i] = src[(words << 2) + i]; + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), data); + nv_debug(ppmu, "0x%08x\n", data); + } + + data = nv_rd32(ppmu, (0x10a1c0 + (port * 8))) & addr_mask; + size = ALIGN(size, 4); + if (data != dst + size) { + nv_error(ppmu, "copy failed. bytes written %d, expected %d", + data - dst, size); + } + mutex_unlock(&pmu->pmu_copy_lock); +out: + nv_debug(ppmu, "exit %s", __func__); +} + +static int pmu_idle(struct nvkm_pmu *ppmu) +{ + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(2000); + u32 idle_stat; + + /* wait for pmu idle */ + do { + idle_stat = nv_rd32(ppmu, 0x0010a04c); + + if (((idle_stat & 0x01) == 0) && + ((idle_stat >> 1) & 0x7fff) == 0) { + break; + } + + if (time_after_eq(jiffies, end_jiffies)) { + nv_error(ppmu, "timeout waiting pmu idle : 0x%08x", + idle_stat); + return -EBUSY; + } + usleep_range(100, 200); + } while (1); + + return 0; +} + +void pmu_enable_irq(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, + bool enable) +{ + + nv_wr32(pmc, 0x00000640, + nv_rd32(pmc, 0x00000640) & + ~0x1000000); + nv_wr32(pmc, 0x00000644, + nv_rd32(pmc, 0x00000644) & + ~0x1000000); + nv_wr32(ppmu, 0x0010a014, 0xff); + + if (enable) { + nv_debug(ppmu, "enable pmu irq\n"); + /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 + nv_wr32(ppmu, 0x0010a01c, 0xff01ff52); + 0=disable, 1=enable*/ + + nv_wr32(ppmu, 0x0010a010, 0xff); + nv_wr32(pmc, 0x00000640, + nv_rd32(pmc, 0x00000640) | + 0x1000000); + nv_wr32(pmc, 0x00000644, + nv_rd32(pmc, 0x00000644) | + 0x1000000); + } else { + nv_debug(ppmu, "disable pmu irq\n"); + } + +} + +static int pmu_enable_hw(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, + bool enable) +{ + u32 reg; + + if (enable) { + int retries = GK20A_IDLE_CHECK_MAX / GK20A_IDLE_CHECK_DEFAULT; + /*need a spinlock?*/ + reg = nv_rd32(pmc, 0x00000200); + reg |= 0x2000; + nv_wr32(pmc, 0x00000200, reg); + nv_rd32(pmc, 0x00000200); + do { + u32 w = nv_rd32(ppmu, 0x0010a10c) & 0x6; + + if (!w) + return 0; + + udelay(GK20A_IDLE_CHECK_DEFAULT); + } while (--retries); + + reg = nv_rd32(pmc, 0x00000200); + reg &= ~0x2000; + nv_wr32(pmc, 0x00000200, reg); + nv_error(ppmu, "Falcon mem scrubbing timeout\n"); + + goto error; + } else { + reg = nv_rd32(pmc, 0x00000200); + reg &= ~0x2000; + nv_wr32(pmc, 0x00000200, reg); + return 0; + } +error: + return -ETIMEDOUT; +} + +static int pmu_enable(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, + bool enable) +{ + u32 pmc_enable; + int err; + + if (!enable) { + pmc_enable = nv_rd32(pmc, 0x200); + if ((pmc_enable & 0x2000) != 0x0) { + pmu_enable_irq(ppmu, pmc, false); + pmu_enable_hw(ppmu, pmc, false); + } + } else { + err = pmu_enable_hw(ppmu, pmc, true); + if (err) + return err; + + /* TBD: post reset */ + + err = pmu_idle(ppmu); + if (err) + return err; + + pmu_enable_irq(ppmu, pmc, true); + } + + return 0; +} + +int pmu_reset(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) +{ + int err; + + err = pmu_idle(ppmu); + if (err) + return err; + + /* TBD: release pmu hw mutex */ + + err = pmu_enable(ppmu, pmc, false); + if (err) + return err; + + err = pmu_enable(ppmu, pmc, true); + if (err) + return err; + + return 0; +} + +static int pmu_bootstrap(struct pmu_desc *pmu) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + struct pmu_ucode_desc *desc = pmu->desc; + u64 addr_code, addr_data, addr_load; + u32 i, blocks, addr_args; + u32 *adr_data, *adr_load, *adr_code; + struct pmu_cmdline_args_gk20a cmdline_args; + struct pmu_priv_vm *ppmuvm = &pmuvm; + + nv_wr32(ppmu, 0x0010a048, + nv_rd32(ppmu, 0x0010a048) | 0x01); + /*bind the address*/ + nv_wr32(ppmu, 0x0010a480, + ppmuvm->mem->addr >> 12 | + 0x1 << 30 | + 0x20000000); + + /* TBD: load all other surfaces */ + cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE; + cmdline_args.falc_trace_dma_base + u64_lo32(pmu->trace_buf.pmubufvma.offset >> 8); + cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT; + cmdline_args.cpu_freq_hz = 204; + cmdline_args.secure_mode = 0; + + addr_args = (nv_rd32(ppmu, 0x0010a108) >> 9) & 0x1ff; + addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2; + addr_args -= sizeof(struct pmu_cmdline_args_gk20a); + nv_debug(ppmu, "initiating copy to dmem\n"); + pmu_copy_to_dmem(pmu, addr_args, + (u8 *)&cmdline_args, + sizeof(struct pmu_cmdline_args_gk20a), 0); + + nv_wr32(ppmu, 0x0010a1c0, 0x1 << 24); + + + addr_code = u64_lo32((pmu->ucode.pmubufvma.offset + + desc->app_start_offset + + desc->app_resident_code_offset) >> 8); + + addr_data = u64_lo32((pmu->ucode.pmubufvma.offset + + desc->app_start_offset + + desc->app_resident_data_offset) >> 8); + + addr_load = u64_lo32((pmu->ucode.pmubufvma.offset + + desc->bootloader_start_offset) >> 8); + + adr_code = (u32 *) (&addr_code); + adr_load = (u32 *) (&addr_load); + adr_data = (u32 *) (&addr_data); + nv_wr32(ppmu, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE); + nv_debug(ppmu, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE); + nv_wr32(ppmu, 0x0010a1c4, *(adr_code)); + nv_debug(ppmu, "0x%08x\n", *(adr_code)); + nv_wr32(ppmu, 0x0010a1c4, desc->app_size); + nv_debug(ppmu, "0x%08x\n", desc->app_size); + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_code_size); + nv_debug(ppmu, "0x%08x\n", desc->app_resident_code_size); + nv_wr32(ppmu, 0x0010a1c4, desc->app_imem_entry); + nv_debug(ppmu, "0x%08x\n", desc->app_imem_entry); + nv_wr32(ppmu, 0x0010a1c4, *(adr_data)); + nv_debug(ppmu, "0x%08x\n", *(adr_data)); + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_data_size); + nv_debug(ppmu, "0x%08x\n", desc->app_resident_data_size); + nv_wr32(ppmu, 0x0010a1c4, *(adr_code)); + nv_debug(ppmu, "0x%08x\n", *(adr_code)); + nv_wr32(ppmu, 0x0010a1c4, 0x1); + nv_debug(ppmu, "0x%08x\n", 1); + nv_wr32(ppmu, 0x0010a1c4, addr_args); + nv_debug(ppmu, "0x%08x\n", addr_args); + + + nv_wr32(ppmu, 0x0010a110, + *(adr_load) - (desc->bootloader_imem_offset >> 8)); + + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; + + for (i = 0; i < blocks; i++) { + nv_wr32(ppmu, 0x0010a114, + desc->bootloader_imem_offset + (i << 8)); + nv_wr32(ppmu, 0x0010a11c, + desc->bootloader_imem_offset + (i << 8)); + nv_wr32(ppmu, 0x0010a118, + 0x01 << 4 | + 0x06 << 8 | + ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12)); + } + + + nv_wr32(ppmu, 0x0010a104, + (0xffffffff & desc->bootloader_entry_point)); + + nv_wr32(ppmu, 0x0010a100, 0x1 << 1); + + nv_wr32(ppmu, 0x0010a080, desc->app_version); + + return 0; +} + +void pmu_seq_init(struct pmu_desc *pmu) +{ + u32 i; + + memset(pmu->seq, 0, + sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES); + memset(pmu->pmu_seq_tbl, 0, + sizeof(pmu->pmu_seq_tbl)); + + for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++) + pmu->seq[i].id = i; +} + +static int pmu_seq_acquire(struct pmu_desc *pmu, + struct pmu_sequence **pseq) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + struct pmu_sequence *seq; + u32 index; + + mutex_lock(&pmu->pmu_seq_lock); + index = find_first_zero_bit(pmu->pmu_seq_tbl, + sizeof(pmu->pmu_seq_tbl)); + if (index >= sizeof(pmu->pmu_seq_tbl)) { + nv_error(ppmu, + "no free sequence available"); + mutex_unlock(&pmu->pmu_seq_lock); + return -EAGAIN; + } + set_bit(index, pmu->pmu_seq_tbl); + mutex_unlock(&pmu->pmu_seq_lock); + + seq = &pmu->seq[index]; + seq->state = PMU_SEQ_STATE_PENDING; + + *pseq = seq; + return 0; +} + +static void pmu_seq_release(struct pmu_desc *pmu, + struct pmu_sequence *seq) +{ + seq->state = PMU_SEQ_STATE_FREE; + seq->desc = PMU_INVALID_SEQ_DESC; + seq->callback = NULL; + seq->cb_params = NULL; + seq->msg = NULL; + seq->out_payload = NULL; + seq->in_gk20a.alloc.dmem.size = 0; + seq->out_gk20a.alloc.dmem.size = 0; + clear_bit(seq->id, pmu->pmu_seq_tbl); +} + +static int pmu_queue_init(struct pmu_desc *pmu, + u32 id, struct pmu_init_msg_pmu_gk20a *init) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + struct pmu_queue *queue = &pmu->queue[id]; + + queue->id = id; + queue->index = init->queue_info[id].index; + queue->offset = init->queue_info[id].offset; + queue->size = init->queue_info[id].size; + queue->mutex_id = id; + mutex_init(&queue->mutex); + + nv_debug(ppmu, "queue %d: index %d, offset 0x%08x, size 0x%08x", + id, queue->index, queue->offset, queue->size); + + return 0; +} + +static int pmu_queue_head(struct pmu_desc *pmu, struct pmu_queue *queue, + u32 *head, bool set) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + + BUG_ON(!head); + + if (PMU_IS_COMMAND_QUEUE(queue->id)) { + + if (queue->index >= 0x00000004) + return -EINVAL; + + if (!set) + *head = nv_rd32(ppmu, 0x0010a4a0 + (queue->index * 4)) & + 0xffffffff; + else + nv_wr32(ppmu, + (0x0010a4a0 + (queue->index * 4)), + (*head & 0xffffffff)); + } else { + if (!set) + *head = nv_rd32(ppmu, 0x0010a4c8) & 0xffffffff; + else + nv_wr32(ppmu, 0x0010a4c8, (*head & 0xffffffff)); + } + + return 0; +} + +static int pmu_queue_tail(struct pmu_desc *pmu, struct pmu_queue *queue, + u32 *tail, bool set) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + + BUG_ON(!tail); + + if (PMU_IS_COMMAND_QUEUE(queue->id)) { + + if (queue->index >= 0x00000004) + return -EINVAL; + + if (!set) + *tail = nv_rd32(ppmu, 0x0010a4b0 + (queue->index * 4)) & + 0xffffffff; + else + nv_wr32(ppmu, (0x0010a4b0 + (queue->index * 4)), + (*tail & 0xffffffff)); + } else { + if (!set) + *tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff; + else + nv_wr32(ppmu, 0x0010a4cc, (*tail & 0xffffffff)); + } + + return 0; +} + +static inline void pmu_queue_read(struct pmu_desc *pmu, + u32 offset, u8 *dst, u32 size) +{ + pmu_copy_from_dmem(pmu, offset, dst, size, 0); +} + +static inline void pmu_queue_write(struct pmu_desc *pmu, + u32 offset, u8 *src, u32 size) +{ + pmu_copy_to_dmem(pmu, offset, src, size, 0); +} + +int pmu_mutex_acquire(struct nvkm_pmu *ppmu, u32 id, u32 *token) +{ + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + struct pmu_mutex *mutex; + u32 data, owner, max_retry; + + if (!pmu->initialized) + return -EINVAL; + + BUG_ON(!token); + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); + BUG_ON(id > pmu->mutex_cnt); + + mutex = &pmu->mutex[id]; + + owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff; + + if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) { + BUG_ON(mutex->ref_cnt == 0); + nv_debug(ppmu, "already acquired by owner : 0x%08x", *token); + mutex->ref_cnt++; + return 0; + } + + max_retry = 40; + do { + data = nv_rd32(ppmu, 0x0010a488) & 0xff; + if (data == 0x00000000 || + data == 0x000000ff) { + nv_warn(ppmu, + "fail to generate mutex token: val 0x%08x", + owner); + usleep_range(20, 40); + continue; + } + + owner = data; + nv_wr32(ppmu, (0x0010a580 + mutex->index * 4), + owner & 0xff); + + data = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)); + + if (owner == data) { + mutex->ref_cnt = 1; + nv_debug(ppmu, "mutex acquired: id=%d, token=0x%x", + mutex->index, *token); + *token = owner; + goto out; + } else { + nv_debug(ppmu, "fail to acquire mutex idx=0x%08x", + mutex->index); + + nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff)); + + usleep_range(20, 40); + continue; + } + } while (max_retry-- > 0); + + return -EBUSY; +out: + return 0; +} + +int pmu_mutex_release(struct nvkm_pmu *ppmu, u32 id, u32 *token) +{ + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + struct pmu_mutex *mutex; + u32 owner; + + if (!pmu->initialized) + return -EINVAL; + + BUG_ON(!token); + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); + BUG_ON(id > pmu->mutex_cnt); + + mutex = &pmu->mutex[id]; + + owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff; + + if (*token != owner) { + nv_error(ppmu, + "requester 0x%08x NOT match owner 0x%08x", + *token, owner); + return -EINVAL; + } + + if (--mutex->ref_cnt > 0) + return -EBUSY; + + nv_wr32(ppmu, 0x0010a580 + (mutex->index * 4), 0x00); + + nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff)); + + nv_debug(ppmu, "mutex released: id=%d, token=0x%x", + mutex->index, *token); + + return 0; +} + +static int pmu_queue_lock(struct pmu_desc *pmu, + struct pmu_queue *queue) +{ + int ret; + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + + if (PMU_IS_MESSAGE_QUEUE(queue->id)) + return 0; + + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { + mutex_lock(&queue->mutex); + return 0; + } + + ret = pmu_mutex_acquire(ppmu, queue->mutex_id, &queue->mutex_lock); + return ret; +} + +static int pmu_queue_unlock(struct pmu_desc *pmu, + struct pmu_queue *queue) +{ + int ret; + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + + if (PMU_IS_MESSAGE_QUEUE(queue->id)) + return 0; + + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { + mutex_unlock(&queue->mutex); + return 0; + } + + ret = pmu_mutex_release(ppmu, queue->mutex_id, &queue->mutex_lock); + return ret; +} + +/* called by pmu_read_message, no lock */ +static bool pmu_queue_is_empty(struct pmu_desc *pmu, + struct pmu_queue *queue) +{ + u32 head, tail; + + pmu_queue_head(pmu, queue, &head, QUEUE_GET); + if (queue->opened && queue->oflag == OFLAG_READ) + tail = queue->position; + else + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); + + return head == tail; +} + +static bool pmu_queue_has_room(struct pmu_desc *pmu, + struct pmu_queue *queue, u32 size, bool *need_rewind) +{ + u32 head, tail, free; + bool rewind = false; + + size = ALIGN(size, QUEUE_ALIGNMENT); + + pmu_queue_head(pmu, queue, &head, QUEUE_GET); + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); + + if (head >= tail) { + free = queue->offset + queue->size - head; + free -= PMU_CMD_HDR_SIZE; + + if (size > free) { + rewind = true; + head = queue->offset; + } + } + + if (head < tail) + free = tail - head - 1; + + if (need_rewind) + *need_rewind = rewind; + + return size <= free; +} + +static int pmu_queue_push(struct pmu_desc *pmu, + struct pmu_queue *queue, void *data, u32 size) +{ + + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + if (!queue->opened && queue->oflag == OFLAG_WRITE) { + nv_error(ppmu, "queue not opened for write\n"); + return -EINVAL; + } + + pmu_queue_write(pmu, queue->position, data, size); + queue->position += ALIGN(size, QUEUE_ALIGNMENT); + return 0; +} + +static int pmu_queue_pop(struct pmu_desc *pmu, + struct pmu_queue *queue, void *data, u32 size, + u32 *bytes_read) +{ + u32 head, tail, used; + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + + *bytes_read = 0; + + if (!queue->opened && queue->oflag == OFLAG_READ) { + nv_error(ppmu, "queue not opened for read\n"); + return -EINVAL; + } + + pmu_queue_head(pmu, queue, &head, QUEUE_GET); + tail = queue->position; + + if (head == tail) + return 0; + + if (head > tail) + used = head - tail; + else + used = queue->offset + queue->size - tail; + + if (size > used) { + nv_warn(ppmu, "queue size smaller than request read\n"); + size = used; + } + + pmu_queue_read(pmu, tail, data, size); + queue->position += ALIGN(size, QUEUE_ALIGNMENT); + *bytes_read = size; + return 0; +} + +static void pmu_queue_rewind(struct pmu_desc *pmu, + struct pmu_queue *queue) +{ + struct pmu_cmd cmd; + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + + + if (!queue->opened) { + nv_error(ppmu, "queue not opened\n"); + goto out; + } + + if (queue->oflag == OFLAG_WRITE) { + cmd.hdr.unit_id = PMU_UNIT_REWIND; + cmd.hdr.size = PMU_CMD_HDR_SIZE; + pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size); + nv_debug(ppmu, "queue %d rewinded\n", queue->id); + } + + queue->position = queue->offset; +out: + nv_debug(ppmu, "exit %s\n", __func__); +} + +/* open for read and lock the queue */ +static int pmu_queue_open_read(struct pmu_desc *pmu, + struct pmu_queue *queue) +{ + int err; + + err = pmu_queue_lock(pmu, queue); + if (err) + return err; + + if (queue->opened) + BUG(); + + pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET); + queue->oflag = OFLAG_READ; + queue->opened = true; + + return 0; +} + +/* open for write and lock the queue + make sure there's enough free space for the write */ +static int pmu_queue_open_write(struct pmu_desc *pmu, + struct pmu_queue *queue, u32 size) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + bool rewind = false; + int err; + + err = pmu_queue_lock(pmu, queue); + if (err) + return err; + + if (queue->opened) + BUG(); + + if (!pmu_queue_has_room(pmu, queue, size, &rewind)) { + nv_error(ppmu, "queue full"); + pmu_queue_unlock(pmu, queue); + return -EAGAIN; + } + + pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET); + queue->oflag = OFLAG_WRITE; + queue->opened = true; + + if (rewind) + pmu_queue_rewind(pmu, queue); + + return 0; +} + +/* close and unlock the queue */ +static int pmu_queue_close(struct pmu_desc *pmu, + struct pmu_queue *queue, bool commit) +{ + if (!queue->opened) + return 0; + + if (commit) { + if (queue->oflag == OFLAG_READ) { + pmu_queue_tail(pmu, queue, + &queue->position, QUEUE_SET); + } else { + pmu_queue_head(pmu, queue, + &queue->position, QUEUE_SET); + } + } + + queue->opened = false; + + pmu_queue_unlock(pmu, queue); + + return 0; +} + +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout, + u32 *var, u32 val) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); + unsigned long delay = GK20A_IDLE_CHECK_DEFAULT; + + do { + if (*var == val) + return 0; + + if (nv_rd32(ppmu, 0x0010a008)) + gk20a_pmu_isr(ppmu); + + usleep_range(delay, delay * 2); + delay = min_t(u32, delay << 1, GK20A_IDLE_CHECK_MAX); + } while (time_before(jiffies, end_jiffies)); + + return -ETIMEDOUT; +} + +void pmu_dump_falcon_stats(struct pmu_desc *pmu) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + int i; + + nv_debug(ppmu, "pmu_falcon_os_r : %d\n", + nv_rd32(ppmu, 0x0010a080)); + nv_debug(ppmu, "pmu_falcon_cpuctl_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a100)); + nv_debug(ppmu, "pmu_falcon_idlestate_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a04c)); + nv_debug(ppmu, "pmu_falcon_mailbox0_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a040)); + nv_debug(ppmu, "pmu_falcon_mailbox1_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a044)); + nv_debug(ppmu, "pmu_falcon_irqstat_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a008)); + nv_debug(ppmu, "pmu_falcon_irqmode_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a00c)); + nv_debug(ppmu, "pmu_falcon_irqmask_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a018)); + nv_debug(ppmu, "pmu_falcon_irqdest_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a01c)); + + for (i = 0; i < 0x0000000c; i++) + nv_debug(ppmu, "pmu_pmu_mailbox_r(%d) : 0x%x\n", + i, nv_rd32(ppmu, 0x0010a450 + i*4)); + + for (i = 0; i < 0x00000004; i++) + nv_debug(ppmu, "pmu_pmu_debug_r(%d) : 0x%x\n", + i, nv_rd32(ppmu, 0x0010a5c0 + i*4)); + + for (i = 0; i < 6/*NV_Ppmu_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) { + nv_wr32(ppmu, 0x0010a200, + 0xe | + (i & 0x1f) << 8); + nv_debug(ppmu, "pmu_rstat (%d) : 0x%x\n", + i, nv_rd32(ppmu, 0x0010a20c)); + } + + i = nv_rd32(ppmu, 0x0010a7b0); + nv_debug(ppmu, "pmu_pmu_bar0_error_status_r : 0x%x\n", i); + if (i != 0) { + nv_debug(ppmu, "pmu_pmu_bar0_addr_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a7a0)); + nv_debug(ppmu, "pmu_pmu_bar0_data_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a7a4)); + nv_debug(ppmu, "pmu_pmu_bar0_timeout_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a7a8)); + nv_debug(ppmu, "pmu_pmu_bar0_ctl_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a7ac)); + } + + i = nv_rd32(ppmu, 0x0010a988); + nv_debug(ppmu, "pmu_pmu_bar0_fecs_error_r : 0x%x\n", i); + + i = nv_rd32(ppmu, 0x0010a16c); + nv_debug(ppmu, "pmu_falcon_exterrstat_r : 0x%x\n", i); + if (((i >> 31) & 0x1)) { + nv_debug(ppmu, "pmu_falcon_exterraddr_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a168)); + /*nv_debug(ppmu, "pmc_enable : 0x%x\n", + nv_rd32(pmc, 0x00000200));*/ + } + + nv_debug(ppmu, "pmu_falcon_engctl_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a0a4)); + nv_debug(ppmu, "pmu_falcon_curctx_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a050)); + nv_debug(ppmu, "pmu_falcon_nxtctx_r : 0x%x\n", + nv_rd32(ppmu, 0x0010a054)); + + nv_wr32(ppmu, 0x0010a200, + 0x8 | + ((PMU_FALCON_REG_IMB & 0x1f) << 8)); + nv_debug(ppmu, "PMU_FALCON_REG_IMB : 0x%x\n", + nv_rd32(ppmu, 0x0010a20c)); + + nv_wr32(ppmu, 0x0010a200, + 0x8 | + ((PMU_FALCON_REG_DMB & 0x1f) << 8)); + nv_debug(ppmu, "PMU_FALCON_REG_DMB : 0x%x\n", + nv_rd32(ppmu, 0x0010a20c)); + + nv_wr32(ppmu, 0x0010a200, + 0x8 | + ((PMU_FALCON_REG_CSW & 0x1f) << 8)); + nv_debug(ppmu, "PMU_FALCON_REG_CSW : 0x%x\n", + nv_rd32(ppmu, 0x0010a20c)); + + nv_wr32(ppmu, 0x0010a200, + 0x8 | + ((PMU_FALCON_REG_CTX & 0x1f) << 8)); + nv_debug(ppmu, "PMU_FALCON_REG_CTX : 0x%x\n", + nv_rd32(ppmu, 0x0010a20c)); + + nv_wr32(ppmu, 0x0010a200, + 0x8 | + ((PMU_FALCON_REG_EXCI & 0x1f) << 8)); + nv_debug(ppmu, "PMU_FALCON_REG_EXCI : 0x%x\n", + nv_rd32(ppmu, 0x0010a20c)); + + for (i = 0; i < 4; i++) { + nv_wr32(ppmu, 0x0010a200, + 0x8 | + ((PMU_FALCON_REG_PC & 0x1f) << 8)); + nv_debug(ppmu, "PMU_FALCON_REG_PC : 0x%x\n", + nv_rd32(ppmu, 0x0010a20c)); + + nv_wr32(ppmu, 0x0010a200, + 0x8 | + ((PMU_FALCON_REG_SP & 0x1f) << 8)); + nv_debug(ppmu, "PMU_FALCON_REG_SP : 0x%x\n", + nv_rd32(ppmu, 0x0010a20c)); + } + + /* PMU may crash due to FECS crash. Dump FECS status */ + /*gk20a_fecs_dump_falcon_stats(g);*/ +} + +static bool pmu_validate_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd, + struct pmu_msg *msg, struct pmu_payload *payload, + u32 queue_id) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + struct pmu_queue *queue; + u32 in_size, out_size; + + nv_debug(ppmu, "pmu validate cmd\n"); + pmu_dump_falcon_stats(pmu); + + if (!PMU_IS_SW_COMMAND_QUEUE(queue_id)) + goto invalid_cmd; + + queue = &pmu->queue[queue_id]; + if (cmd->hdr.size < PMU_CMD_HDR_SIZE) + goto invalid_cmd; + + if (cmd->hdr.size > (queue->size >> 1)) + goto invalid_cmd; + + if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE) + goto invalid_cmd; + + if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id)) + goto invalid_cmd; + + if (payload == NULL) + return true; + + if (payload->in.buf == NULL && payload->out.buf == NULL) + goto invalid_cmd; + + if ((payload->in.buf != NULL && payload->in.size == 0) || + (payload->out.buf != NULL && payload->out.size == 0)) + goto invalid_cmd; + + in_size = PMU_CMD_HDR_SIZE; + if (payload->in.buf) { + in_size += payload->in.offset; + in_size += sizeof(struct pmu_allocation_gk20a); + } + + out_size = PMU_CMD_HDR_SIZE; + if (payload->out.buf) { + out_size += payload->out.offset; + out_size += sizeof(struct pmu_allocation_gk20a); + } + + if (in_size > cmd->hdr.size || out_size > cmd->hdr.size) + goto invalid_cmd; + + + if ((payload->in.offset != 0 && payload->in.buf == NULL) || + (payload->out.offset != 0 && payload->out.buf == NULL)) + goto invalid_cmd; + + return true; + +invalid_cmd: + nv_error(ppmu, "invalid pmu cmd :\n" + "queue_id=%d,\n" + "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n" + "payload in=%p, in_size=%d, in_offset=%d,\n" + "payload out=%p, out_size=%d, out_offset=%d", + queue_id, cmd->hdr.size, cmd->hdr.unit_id, + msg, msg ? msg->hdr.unit_id : ~0, + &payload->in, payload->in.size, payload->in.offset, + &payload->out, payload->out.size, payload->out.offset); + + return false; +} + +static int pmu_write_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd, + u32 queue_id, unsigned long timeout) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + struct pmu_queue *queue; + unsigned long end_jiffies = jiffies + + msecs_to_jiffies(timeout); + int err; + + nv_debug(ppmu, "pmu write cmd\n"); + + queue = &pmu->queue[queue_id]; + + do { + err = pmu_queue_open_write(pmu, queue, cmd->hdr.size); + if (err == -EAGAIN && time_before(jiffies, end_jiffies)) + usleep_range(1000, 2000); + else + break; + } while (1); + + if (err) + goto clean_up; + + pmu_queue_push(pmu, queue, cmd, cmd->hdr.size); + + err = pmu_queue_close(pmu, queue, true); + +clean_up: + if (err) + nv_error(ppmu, + "fail to write cmd to queue %d", queue_id); + else + nv_debug(ppmu, "cmd writing done"); + + return err; +} + +int gk20a_pmu_cmd_post(struct nvkm_pmu *ppmu, struct pmu_cmd *cmd, + struct pmu_msg *msg, struct pmu_payload *payload, + u32 queue_id, pmu_callback callback, void *cb_param, + u32 *seq_desc, unsigned long timeout) +{ + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + struct pmu_sequence *seq; + struct pmu_allocation_gk20a *in = NULL, *out = NULL; + int err; + + BUG_ON(!cmd); + BUG_ON(!seq_desc); + BUG_ON(!pmu->pmu_ready); + nv_debug(ppmu, "Post CMD\n"); + if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id)) + return -EINVAL; + + err = pmu_seq_acquire(pmu, &seq); + if (err) + return err; + + cmd->hdr.seq_id = seq->id; + + cmd->hdr.ctrl_flags = 0; + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS; + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR; + + seq->callback = callback; + seq->cb_params = cb_param; + seq->msg = msg; + seq->out_payload = NULL; + seq->desc = pmu->next_seq_desc++; + + if (payload) + seq->out_payload = payload->out.buf; + + *seq_desc = seq->desc; + + if (payload && payload->in.offset != 0) { + in = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd + + payload->in.offset); + + if (payload->in.buf != payload->out.buf) + in->alloc.dmem.size = (u16)payload->in.size; + else + in->alloc.dmem.size + (u16)max(payload->in.size, payload->out.size); + + err = pmu->dmem.alloc(&pmu->dmem, + (void *)&in->alloc.dmem.offset, + in->alloc.dmem.size, + PMU_DMEM_ALLOC_ALIGNMENT); + if (err) + goto clean_up; + + pmu_copy_to_dmem(pmu, (in->alloc.dmem.offset), + payload->in.buf, payload->in.size, 0); + seq->in_gk20a.alloc.dmem.size = in->alloc.dmem.size; + seq->in_gk20a.alloc.dmem.offset = in->alloc.dmem.offset; + } + + if (payload && payload->out.offset != 0) { + out = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd + + payload->out.offset); + out->alloc.dmem.size = (u16)payload->out.size; + + if (payload->out.buf != payload->in.buf) { + err = pmu->dmem.alloc(&pmu->dmem, + (void *)&out->alloc.dmem.offset, + out->alloc.dmem.size, + PMU_DMEM_ALLOC_ALIGNMENT); + if (err) + goto clean_up; + } else { + BUG_ON(in == NULL); + out->alloc.dmem.offset = in->alloc.dmem.offset; + } + + seq->out_gk20a.alloc.dmem.size = out->alloc.dmem.size; + seq->out_gk20a.alloc.dmem.offset = out->alloc.dmem.offset; + } + + seq->state = PMU_SEQ_STATE_USED; + err = pmu_write_cmd(pmu, cmd, queue_id, timeout); + if (err) + seq->state = PMU_SEQ_STATE_PENDING; + + nv_debug(ppmu, "cmd posted\n"); + + return 0; + +clean_up: + nv_debug(ppmu, "cmd post failed\n"); + if (in) + pmu->dmem.free(&pmu->dmem, + in->alloc.dmem.offset, + in->alloc.dmem.size, + PMU_DMEM_ALLOC_ALIGNMENT); + if (out) + pmu->dmem.free(&pmu->dmem, + out->alloc.dmem.offset, + out->alloc.dmem.size, + PMU_DMEM_ALLOC_ALIGNMENT); + + pmu_seq_release(pmu, seq); + return err; +} + +void gk20a_pmu_isr(struct nvkm_pmu *ppmu) +{ + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + struct nvkm_mc *pmc = nvkm_mc(ppmu); + struct pmu_queue *queue; + u32 intr, mask; + bool recheck = false; + if (!pmu->isr_enabled) + goto out; + + mask = nv_rd32(ppmu, 0x0010a018) & + nv_rd32(ppmu, 0x0010a01c); + + intr = nv_rd32(ppmu, 0x0010a008) & mask; + + nv_debug(ppmu, "received falcon interrupt: 0x%08x", intr); + pmu_enable_irq(ppmu, pmc, false); + if (!intr || pmu->pmu_state == PMU_STATE_OFF) { + nv_wr32(ppmu, 0x0010a004, intr); + nv_error(ppmu, "pmu state off\n"); + pmu_enable_irq(ppmu, pmc, true); + goto out; + } + if (intr & 0x10) { + nv_error(ppmu, + "pmu halt intr not implemented"); + pmu_dump_falcon_stats(pmu); + } + if (intr & 0x20) { + nv_error(ppmu, + "pmu exterr intr not implemented. Clearing interrupt."); + pmu_dump_falcon_stats(pmu); + + nv_wr32(ppmu, 0x0010a16c, + nv_rd32(ppmu, 0x0010a16c) & + ~(0x1 << 31)); + } + if (intr & 0x40) { + nv_debug(ppmu, "scheduling work\n"); + schedule_work(&pmu->isr_workq); + pmu_enable_irq(ppmu, pmc, true); + recheck = true; + } + + if (recheck) { + queue = &pmu->queue[PMU_MESSAGE_QUEUE]; + if (!pmu_queue_is_empty(pmu, queue)) + nv_wr32(ppmu, 0x0010a000, 0x40); + } else { + pmu_enable_irq(ppmu, pmc, true); + } + + pmu_enable_irq(ppmu, pmc, true); + nv_wr32(ppmu, 0x0010a004, intr); +out: + nv_debug(ppmu, "irq handled\n"); +} + +static int +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw) +{ + int ret = 0; + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + u32 *ucode_image; + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; + int i; + struct pmu_priv_vm *ppmuvm = &pmuvm; + struct nvkm_device *device = nv_device(&ppmu->base); + struct nvkm_vm *vm; + u64 pmu_area_len = 300*1024; + + ppmu->pmuvm = &pmuvm; + ppmu->pg_buf = &pmu->pg_buf; + pmu->pmu = ppmu; + /* mem for inst blk*/ + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x1000, 0, 0, + &ppmuvm->mem); + if (ret) + goto instblk_alloc_err; + + /* mem for pgd*/ + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x8000, 0, 0, + &ppmuvm->pgd); + if (ret) + goto pgd_alloc_err; + + /*allocate virtual memory range*/ + ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm); + if (ret) + goto virt_alloc_err; + + atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]); + /*update VM with pgd */ + + ret = nvkm_vm_ref(vm, &ppmuvm->vm, ppmuvm->pgd); + if (ret) + goto virt_alloc_err; + + /*update pgd in inst blk */ + nv_wo32(ppmuvm->mem, 0x0200, lower_32_bits(ppmuvm->pgd->addr)); + nv_wo32(ppmuvm->mem, 0x0204, upper_32_bits(ppmuvm->pgd->addr)); + nv_wo32(ppmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1)); + nv_wo32(ppmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1)); + + /* allocate memory for pmu fw to be copied to*/ + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, + GK20A_PMU_UCODE_SIZE_MAX, 0x1000, 0, &pmu->ucode.pmubufobj); + if (ret) + goto fw_alloc_err; + + ucode_image = (u32 *)((u32)desc + desc->descriptor_size); + for (i = 0; i < (desc->app_start_offset + desc->app_size) >> 2; i++) { + nv_wo32(pmu->ucode.pmubufobj, i << 2, ucode_image[i]); + pr_info("writing 0x%08x\n", ucode_image[i]); + } + /* map allocated memory into GMMU */ + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->ucode.pmubufobj), vm, + NV_MEM_ACCESS_RW, + &pmu->ucode.pmubufvma); + if (ret) + goto map_err; + + nv_debug(ppmu, "%s function end\n", __func__); + return ret; +map_err: + nvkm_gpuobj_destroy(pmu->ucode.pmubufobj); +virt_alloc_err: +fw_alloc_err: + nvkm_gpuobj_destroy(ppmuvm->pgd); +pgd_alloc_err: + nvkm_gpuobj_destroy(ppmuvm->mem); +instblk_alloc_err: + return ret; + +} + +static int +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw) +{ + struct nvkm_device *dev; + char name[32]; + + dev = nv_device(ppmu); + + snprintf(name, sizeof(name), "nvidia/tegra124/%s", + GK20A_PMU_UCODE_IMAGE); + + return request_firmware(pfw, name, nv_device_base(dev)); +} + +static void +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, + const struct firmware *fw) +{ + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; + + nv_debug(ppmu, "GK20A PMU firmware information\n"); + nv_debug(ppmu, "descriptor size = %u\n", desc->descriptor_size); + nv_debug(ppmu, "image size = %u\n", desc->image_size); + nv_debug(ppmu, "app_version = 0x%08x\n", desc->app_version); + nv_debug(ppmu, "date = %s\n", desc->date); + nv_debug(ppmu, "bootloader_start_offset = 0x%08x\n", + desc->bootloader_start_offset); + nv_debug(ppmu, "bootloader_size = 0x%08x\n", desc->bootloader_size); + nv_debug(ppmu, "bootloader_imem_offset = 0x%08x\n", + desc->bootloader_imem_offset); + nv_debug(ppmu, "bootloader_entry_point = 0x%08x\n", + desc->bootloader_entry_point); + nv_debug(ppmu, "app_start_offset = 0x%08x\n", desc->app_start_offset); + nv_debug(ppmu, "app_size = 0x%08x\n", desc->app_size); + nv_debug(ppmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset); + nv_debug(ppmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry); + nv_debug(ppmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset); + nv_debug(ppmu, "app_resident_code_offset = 0x%08x\n", + desc->app_resident_code_offset); + nv_debug(ppmu, "app_resident_code_size = 0x%08x\n", + desc->app_resident_code_size); + nv_debug(ppmu, "app_resident_data_offset = 0x%08x\n", + desc->app_resident_data_offset); + nv_debug(ppmu, "app_resident_data_size = 0x%08x\n", + desc->app_resident_data_size); + nv_debug(ppmu, "nb_overlays = %d\n", desc->nb_overlays); + + nv_debug(ppmu, "compressed = %u\n", desc->compressed); +} + +static int pmu_process_init_msg(struct pmu_desc *pmu, + struct pmu_msg *msg) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + struct pmu_init_msg_pmu_gk20a *init; + struct pmu_sha1_gid_data gid_data; + u32 i, tail = 0; + + tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff; + + pmu_copy_from_dmem(pmu, tail, + (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); + + if (msg->hdr.unit_id != PMU_UNIT_INIT) { + nv_error(ppmu, + "expecting init msg"); + return -EINVAL; + } + + pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE, + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); + + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { + nv_error(ppmu, + "expecting init msg"); + return -EINVAL; + } + + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); + nv_wr32(ppmu, 0x0010a4cc, + tail & 0xffffffff); + + init = &msg->msg.init.pmu_init_gk20a; + if (!pmu->gid_info.valid) { + + pmu_copy_from_dmem(pmu, + init->sw_managed_area_offset, + (u8 *)&gid_data, + sizeof(struct pmu_sha1_gid_data), 0); + + pmu->gid_info.valid + (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE); + + if (pmu->gid_info.valid) { + + BUG_ON(sizeof(pmu->gid_info.gid) !+ sizeof(gid_data.gid)); + + memcpy(pmu->gid_info.gid, gid_data.gid, + sizeof(pmu->gid_info.gid)); + } + } + + for (i = 0; i < PMU_QUEUE_COUNT; i++) + pmu_queue_init(pmu, i, init); + + if (!pmu->dmem.alloc) + nvkm_pmu_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", + init->sw_managed_area_offset, + init->sw_managed_area_size); + + pmu->pmu_ready = true; + pmu->pmu_state = PMU_STATE_INIT_RECEIVED; + + return 0; +} + +static bool pmu_read_message(struct pmu_desc *pmu, struct pmu_queue *queue, + struct pmu_msg *msg, int *status) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + u32 read_size, bytes_read; + int err; + + *status = 0; + + if (pmu_queue_is_empty(pmu, queue)) + return false; + + err = pmu_queue_open_read(pmu, queue); + if (err) { + nv_error(ppmu, + "fail to open queue %d for read", queue->id); + *status = err; + return false; + } + + err = pmu_queue_pop(pmu, queue, &msg->hdr, + PMU_MSG_HDR_SIZE, &bytes_read); + if (err || bytes_read != PMU_MSG_HDR_SIZE) { + nv_error(ppmu, + "fail to read msg from queue %d", queue->id); + *status = err | -EINVAL; + goto clean_up; + } + + if (msg->hdr.unit_id == PMU_UNIT_REWIND) { + pmu_queue_rewind(pmu, queue); + /* read again after rewind */ + err = pmu_queue_pop(pmu, queue, &msg->hdr, + PMU_MSG_HDR_SIZE, &bytes_read); + if (err || bytes_read != PMU_MSG_HDR_SIZE) { + nv_error(ppmu, + "fail to read msg from queue %d", queue->id); + *status = err | -EINVAL; + goto clean_up; + } + } + + if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) { + nv_error(ppmu, + "read invalid unit_id %d from queue %d", + msg->hdr.unit_id, queue->id); + *status = -EINVAL; + goto clean_up; + } + + if (msg->hdr.size > PMU_MSG_HDR_SIZE) { + read_size = msg->hdr.size - PMU_MSG_HDR_SIZE; + err = pmu_queue_pop(pmu, queue, &msg->msg, + read_size, &bytes_read); + if (err || bytes_read != read_size) { + nv_error(ppmu, + "fail to read msg from queue %d", queue->id); + *status = err; + goto clean_up; + } + } + + err = pmu_queue_close(pmu, queue, true); + if (err) { + nv_error(ppmu, + "fail to close queue %d", queue->id); + *status = err; + return false; + } + + return true; + +clean_up: + err = pmu_queue_close(pmu, queue, false); + if (err) + nv_error(ppmu, + "fail to close queue %d", queue->id); + return false; +} + +static int pmu_response_handle(struct pmu_desc *pmu, + struct pmu_msg *msg) +{ + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + struct pmu_sequence *seq; + int ret = 0; + + nv_debug(ppmu, "handling pmu response\n"); + seq = &pmu->seq[msg->hdr.seq_id]; + if (seq->state != PMU_SEQ_STATE_USED && + seq->state != PMU_SEQ_STATE_CANCELLED) { + nv_error(ppmu, + "msg for an unknown sequence %d", seq->id); + return -EINVAL; + } + + if (msg->hdr.unit_id == PMU_UNIT_RC && + msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { + nv_error(ppmu, + "unhandled cmd: seq %d", seq->id); + } else if (seq->state != PMU_SEQ_STATE_CANCELLED) { + if (seq->msg) { + if (seq->msg->hdr.size >= msg->hdr.size) { + memcpy(seq->msg, msg, msg->hdr.size); + if (seq->out_gk20a.alloc.dmem.size != 0) { + pmu_copy_from_dmem(pmu, + seq->out_gk20a.alloc.dmem.offset, + seq->out_payload, + seq->out_gk20a.alloc.dmem.size, 0); + } + } else { + nv_error(ppmu, + "sequence %d msg buffer too small", + seq->id); + } + } + } else + seq->callback = NULL; + if (seq->in_gk20a.alloc.dmem.size != 0) + pmu->dmem.free(&pmu->dmem, + seq->in_gk20a.alloc.dmem.offset, + seq->in_gk20a.alloc.dmem.size, + PMU_DMEM_ALLOC_ALIGNMENT); + if (seq->out_gk20a.alloc.dmem.size != 0) + pmu->dmem.free(&pmu->dmem, + seq->out_gk20a.alloc.dmem.offset, + seq->out_gk20a.alloc.dmem.size, + PMU_DMEM_ALLOC_ALIGNMENT); + + if (seq->callback) + seq->callback(ppmu, msg, seq->cb_params, seq->desc, ret); + + pmu_seq_release(pmu, seq); + + /* TBD: notify client waiting for available dmem */ + nv_debug(ppmu, "pmu response processed\n"); + + return 0; +} + +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout, + u32 *var, u32 val); + + +static int pmu_handle_event(struct pmu_desc *pmu, struct pmu_msg *msg) +{ + int err = 0; + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + + switch (msg->hdr.unit_id) { + case PMU_UNIT_PERFMON: + nv_debug(ppmu, "init perfmon event generated\n"); + break; + default: + nv_debug(ppmu, "default event generated\n"); + break; + } + + return err; +} + +void pmu_process_message(struct work_struct *work) +{ + struct pmu_desc *pmu = container_of(work, struct pmu_desc, isr_workq); + struct pmu_msg msg; + int status; + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) + impl_from_pmu(pmu)); + struct nvkm_mc *pmc = nvkm_mc(ppmu); + + mutex_lock(&pmu->isr_mutex); + if (unlikely(!pmu->pmu_ready)) { + nv_debug(ppmu, "processing init msg\n"); + pmu_process_init_msg(pmu, &msg); + mutex_unlock(&pmu->isr_mutex); + pmu_enable_irq(ppmu, pmc, true); + goto out; + } + + while (pmu_read_message(pmu, + &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) { + + nv_debug(ppmu, "read msg hdr:\n" + "unit_id = 0x%08x, size = 0x%08x,\n" + "ctrl_flags = 0x%08x, seq_id = 0x%08x\n", + msg.hdr.unit_id, msg.hdr.size, + msg.hdr.ctrl_flags, msg.hdr.seq_id); + + msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK; + + if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) + pmu_handle_event(pmu, &msg); + else + pmu_response_handle(pmu, &msg); + } + mutex_unlock(&pmu->isr_mutex); + pmu_enable_irq(ppmu, pmc, true); +out: + nv_debug(ppmu, "exit %s\n", __func__); +} + +int gk20a_pmu_destroy(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) +{ + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + + /* make sure the pending operations are finished before we continue */ + cancel_work_sync(&pmu->isr_workq); + pmu->initialized = false; + + mutex_lock(&pmu->isr_mutex); + pmu_enable(ppmu, pmc, false); + pmu->isr_enabled = false; + mutex_unlock(&pmu->isr_mutex); + + pmu->pmu_state = PMU_STATE_OFF; + pmu->pmu_ready = false; + pmu->zbc_ready = false; + + return 0; +} + +int gk20a_pmu_load_norm(struct nvkm_pmu *ppmu, u32 *load) +{ + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + *load = pmu->load_shadow; + return 0; +} + +int gk20a_pmu_load_update(struct nvkm_pmu *ppmu) +{ + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + u16 _load = 0; + + pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); + pmu->load_shadow = _load / 10; + pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); + + return 0; +} + +void gk20a_pmu_get_load_counters(struct nvkm_pmu *ppmu, u32 *busy_cycles, + u32 *total_cycles) +{ + /*todo if (!g->power_on || gk20a_busy(g->dev)) { + *busy_cycles = 0; + *total_cycles = 0; + return; + }*/ + + *busy_cycles = nv_rd32(ppmu, 0x0010a508 + 16) & 0x7fffffff; + /*todormb();*/ + *total_cycles = nv_rd32(ppmu, 0x0010a508 + 32) & 0x7fffffff; + /*todogk20a_idle(g->dev);*/ +} + +void gk20a_pmu_reset_load_counters(struct nvkm_pmu *ppmu) +{ + u32 reg_val = 1 << 31; + + /*todoif (!g->power_on || gk20a_busy(g->dev)) + return;*/ + + nv_wr32(ppmu, 0x0010a508 + 32, reg_val); + /*todowmb()*/; + nv_wr32(ppmu, 0x0010a508 + 16, reg_val); + /*todogk20a_idle(g->dev);*/ +} + +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) +{ + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + int err; + + mutex_lock(&pmu->isr_mutex); + pmu_reset(ppmu, pmc); + pmu->isr_enabled = true; + mutex_unlock(&pmu->isr_mutex); + + /* setup apertures - virtual */ + nv_wr32(ppmu, 0x10a600 + 0 * 4, 0x0); + nv_wr32(ppmu, 0x10a600 + 1 * 4, 0x0); + /* setup apertures - physical */ + nv_wr32(ppmu, 0x10a600 + 2 * 4, 0x4 | 0x0); + nv_wr32(ppmu, 0x10a600 + 3 * 4, 0x4 | 0x1); + nv_wr32(ppmu, 0x10a600 + 4 * 4, 0x4 | 0x2); + + /* TBD: load pmu ucode */ + err = pmu_bootstrap(pmu); + if (err) + return err; + + return 0; + +} + +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu) +{ + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); + struct pmu_desc *pmu = &impl->pmudata; + struct pmu_priv_vm *ppmuvm = &pmuvm; + int i, err = 0; + int ret = 0; + + + if (pmu->sw_ready) { + + for (i = 0; i < pmu->mutex_cnt; i++) { + pmu->mutex[i].id = i; + pmu->mutex[i].index = i; + } + pmu_seq_init(pmu); + + nv_debug(ppmu, "skipping init\n"); + goto skip_init; + } + + /* no infoRom script from vbios? */ + + /* TBD: sysmon subtask */ + + pmu->mutex_cnt = 0x00000010; + pmu->mutex = kzalloc(pmu->mutex_cnt * + sizeof(struct pmu_mutex), GFP_KERNEL); + if (!pmu->mutex) { + err = -ENOMEM; + nv_error(ppmu, "not enough space ENOMEM\n"); + goto err; + } + + for (i = 0; i < pmu->mutex_cnt; i++) { + pmu->mutex[i].id = i; + pmu->mutex[i].index = i; + } + + pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES * + sizeof(struct pmu_sequence), GFP_KERNEL); + if (!pmu->seq) { + err = -ENOMEM; + nv_error(ppmu, "not enough space ENOMEM\n"); + goto err_free_mutex; + } + + pmu_seq_init(pmu); + + INIT_WORK(&pmu->isr_workq, pmu_process_message); + init_waitqueue_head(&ppmu->init_wq); + ppmu->gr_initialised = false; + + /* allocate memory for pmu fw area */ + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_SEQ_BUF_SIZE, + 0x1000, 0, &pmu->seq_buf.pmubufobj); + if (ret) + return ret; + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_TRACE_BUFSIZE, + 0, 0, &pmu->trace_buf.pmubufobj); + if (ret) + return ret; + /* map allocated memory into GMMU */ + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->seq_buf.pmubufobj), + ppmuvm->vm, + NV_MEM_ACCESS_RW, + &pmu->seq_buf.pmubufvma); + if (ret) + return ret; + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->trace_buf.pmubufobj), + ppmuvm->vm, + NV_MEM_ACCESS_RW, + &pmu->trace_buf.pmubufvma); + if (ret) + return ret; + + /* TBD: remove this if ZBC save/restore is handled by PMU + * end an empty ZBC sequence for now */ + nv_wo32(pmu->seq_buf.pmubufobj, 0, 0x16); + nv_wo32(pmu->seq_buf.pmubufobj, 1, 0x00); + nv_wo32(pmu->seq_buf.pmubufobj, 2, 0x01); + nv_wo32(pmu->seq_buf.pmubufobj, 3, 0x00); + nv_wo32(pmu->seq_buf.pmubufobj, 4, 0x00); + nv_wo32(pmu->seq_buf.pmubufobj, 5, 0x00); + nv_wo32(pmu->seq_buf.pmubufobj, 6, 0x00); + nv_wo32(pmu->seq_buf.pmubufobj, 7, 0x00); + + pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; + ret = gk20a_pmu_debugfs_init(ppmu); + if (ret) + return ret; + + pmu->sw_ready = true; + +skip_init: + return 0; +err_free_mutex: + kfree(pmu->mutex); +err: + return err; +} + +static void +gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable) +{ + /* + nv_mask(ppmu, 0x000200, 0x00001000, 0x00000000); + nv_rd32(ppmu, 0x000200); + nv_mask(ppmu, 0x000200, 0x08000000, 0x08000000); + + msleep(50); + + nv_mask(ppmu, 0x000200, 0x08000000, 0x00000000); + nv_mask(ppmu, 0x000200, 0x00001000, 0x00001000); + nv_rd32(ppmu, 0x000200); + */ +} + +static void gk20a_pmu_intr(struct nvkm_subdev *subdev) +{ + struct nvkm_pmu *ppmu = nvkm_pmu(subdev); + + gk20a_pmu_isr(ppmu); +} + +void gk20a_remove_pmu_support(struct pmu_desc *pmu) +{ + nvkm_pmu_allocator_destroy(&pmu->dmem); +} + +int gk20a_message(struct nvkm_pmu *ppmu, u32 reply[2], + u32 process, u32 message, u32 data0, u32 data1) +{ + return -EPERM; +} + +int +gk20a_pmu_create_(struct nvkm_object *parent, + struct nvkm_object *engine, + struct nvkm_oclass *oclass, int length, void **pobject) +{ + struct nvkm_pmu *ppmu; + struct nvkm_device *device = nv_device(parent); + int ret; + + ret = nvkm_subdev_create_(parent, engine, oclass, 0, "PPMU", + "pmu", length, pobject); + ppmu = *pobject; + if (ret) + return ret; + + ret = nv_device_get_irq(device, true); + + ppmu->message = gk20a_message; + ppmu->pgob = gk20a_pmu_pgob; + ppmu->pmu_mutex_acquire = pmu_mutex_acquire; + ppmu->pmu_mutex_release = pmu_mutex_release; + ppmu->pmu_load_norm = gk20a_pmu_load_norm; + ppmu->pmu_load_update = gk20a_pmu_load_update; + ppmu->pmu_reset_load_counters = gk20a_pmu_reset_load_counters; + ppmu->pmu_get_load_counters = gk20a_pmu_get_load_counters; + + return 0; +} + + + diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.h b/drm/nouveau/nvkm/subdev/pmu/gk20a.h new file mode 100644 index 000000000000..a084d6d518b4 --- /dev/null +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.h @@ -0,0 +1,369 @@ +#ifndef __NVKM_pmu_GK20A_H__ +#define __NVKM_pmu_GK20A_H__ + +/* + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +void pmu_setup_hw(struct pmu_desc *pmu); +void gk20a_remove_pmu_support(struct pmu_desc *pmu); +#define gk20a_pmu_create(p, e, o, d) \ + gk20a_pmu_create_((p), (e), (o), sizeof(**d), (void **)d) + +int gk20a_pmu_create_(struct nvkm_object *, struct nvkm_object *, + struct nvkm_oclass *, int, void **); +/* defined by pmu hw spec */ +#define GK20A_PMU_VA_SIZE (512 * 1024 * 1024) +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) +#define GK20A_PMU_SEQ_BUF_SIZE 4096 +/* idle timeout */ +#define GK20A_IDLE_CHECK_DEFAULT 100 /* usec */ +#define GK20A_IDLE_CHECK_MAX 5000 /* usec */ + +/* so far gk20a has two engines: gr and ce2(gr_copy) */ +enum { + ENGINE_GR_GK20A = 0, + ENGINE_CE2_GK20A = 1, + ENGINE_INVAL_GK20A +}; + +#define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe) + +#define APP_VERSION_GK20A 17997577 + +enum { + GK20A_PMU_DMAIDX_UCODE = 0, + GK20A_PMU_DMAIDX_VIRT = 1, + GK20A_PMU_DMAIDX_PHYS_VID = 2, + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, + GK20A_PMU_DMAIDX_RSVD = 5, + GK20A_PMU_DMAIDX_PELPG = 6, + GK20A_PMU_DMAIDX_END = 7 +}; + +struct pmu_mem_gk20a { + u32 dma_base; + u8 dma_offset; + u8 dma_idx; + u16 fb_size; +}; + +struct pmu_dmem { + u16 size; + u32 offset; +}; + +struct pmu_cmdline_args_gk20a { + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ + u32 falc_trace_size; /* falctrace buffer size (bytes) */ + u32 falc_trace_dma_base; /* 256-byte block address */ + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ + u8 secure_mode; + struct pmu_mem_gk20a gc6_ctx; /* dmem offset of gc6 context */ +}; + +#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */ +#define GK20A_PMU_DMEM_BLKSIZE2 8 + +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 + +struct pmu_ucode_desc { + u32 descriptor_size; + u32 image_size; + u32 tools_version; + u32 app_version; + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; + u32 bootloader_start_offset; + u32 bootloader_size; + u32 bootloader_imem_offset; + u32 bootloader_entry_point; + u32 app_start_offset; + u32 app_size; + u32 app_imem_offset; + u32 app_imem_entry; + u32 app_dmem_offset; + u32 app_resident_code_offset; /* Offset from appStartOffset */ +/* Exact size of the resident code + * ( potentially contains CRC inside at the end ) */ + u32 app_resident_code_size; + u32 app_resident_data_offset; /* Offset from appStartOffset */ +/* Exact size of the resident data + * ( potentially contains CRC inside at the end ) */ + u32 app_resident_data_size; + u32 nb_overlays; + struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; + u32 compressed; +}; + +#define PMU_UNIT_REWIND (0x00) +#define PMU_UNIT_PG (0x03) +#define PMU_UNIT_INIT (0x07) +#define PMU_UNIT_PERFMON (0x12) +#define PMU_UNIT_THERM (0x1B) +#define PMU_UNIT_RC (0x1F) +#define PMU_UNIT_NULL (0x20) +#define PMU_UNIT_END (0x23) + +#define PMU_UNIT_TEST_START (0xFE) +#define PMU_UNIT_END_SIM (0xFF) +#define PMU_UNIT_TEST_END (0xFF) + +#define PMU_UNIT_ID_IS_VALID(id) \ + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) + +#define PMU_DMEM_ALLOC_ALIGNMENT (32) +#define PMU_DMEM_ALIGNMENT (4) + +#define PMU_CMD_FLAGS_PMU_MASK (0xF0) + +#define PMU_CMD_FLAGS_STATUS BIT(0) +#define PMU_CMD_FLAGS_INTR BIT(1) +#define PMU_CMD_FLAGS_EVENT BIT(2) +#define PMU_CMD_FLAGS_WATERMARK BIT(3) + +struct pmu_hdr { + u8 unit_id; + u8 size; + u8 ctrl_flags; + u8 seq_id; +}; +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) +#define PMU_CMD_HDR_SIZE sizeof(struct pmu_hdr) + + +struct pmu_allocation_gk20a { + struct { + struct pmu_dmem dmem; + struct pmu_mem_gk20a fb; + } alloc; +}; + +enum { + PMU_INIT_MSG_TYPE_PMU_INIT = 0, +}; + +struct pmu_init_msg_pmu_gk20a { + u8 msg_type; + u8 pad; + u16 os_debug_entry_point; + + struct { + u16 size; + u16 offset; + u8 index; + u8 pad; + } queue_info[PMU_QUEUE_COUNT]; + + u16 sw_managed_area_offset; + u16 sw_managed_area_size; +}; + +struct pmu_init_msg { + union { + u8 msg_type; + struct pmu_init_msg_pmu_gk20a pmu_init_gk20a; + }; +}; + + +enum { + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, +}; + +struct pmu_rc_msg_unhandled_cmd { + u8 msg_type; + u8 unit_id; +}; + +struct pmu_rc_msg { + u8 msg_type; + struct pmu_rc_msg_unhandled_cmd unhandled_cmd; +}; + +/* PERFMON */ +#define PMU_DOMAIN_GROUP_PSTATE 0 +#define PMU_DOMAIN_GROUP_GPC2CLK 1 +#define PMU_DOMAIN_GROUP_NUM 2 +struct pmu_perfmon_counter_gk20a { + u8 index; + u8 flags; + u8 group_id; + u8 valid; + u16 upper_threshold; /* units of 0.01% */ + u16 lower_threshold; /* units of 0.01% */ +}; +struct pmu_zbc_cmd { + u8 cmd_type; + u8 pad; + u16 entry_mask; +}; + +/* PERFMON MSG */ +enum { + PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0, + PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1, + PMU_PERFMON_MSG_ID_INIT_EVENT = 2, + PMU_PERFMON_MSG_ID_ACK = 3 +}; + +struct pmu_perfmon_msg_generic { + u8 msg_type; + u8 state_id; + u8 group_id; + u8 data; +}; + +struct pmu_perfmon_msg { + union { + u8 msg_type; + struct pmu_perfmon_msg_generic gen; + }; +}; + + +struct pmu_cmd { + struct pmu_hdr hdr; + union { + struct pmu_zbc_cmd zbc; + } cmd; +}; + +struct pmu_msg { + struct pmu_hdr hdr; + union { + struct pmu_init_msg init; + struct pmu_perfmon_msg perfmon; + struct pmu_rc_msg rc; + } msg; +}; + +/* write by sw, read by pmu, protected by sw mutex lock */ +#define PMU_COMMAND_QUEUE_HPQ 0 +/* write by sw, read by pmu, protected by sw mutex lock */ +#define PMU_COMMAND_QUEUE_LPQ 1 +/* write by pmu, read by sw, accessed by interrupt handler, no lock */ +#define PMU_MESSAGE_QUEUE 4 +#define PMU_QUEUE_COUNT 5 + +enum { + PMU_MUTEX_ID_RSVD1 = 0, + PMU_MUTEX_ID_GPUSER, + PMU_MUTEX_ID_GPMUTEX, + PMU_MUTEX_ID_I2C, + PMU_MUTEX_ID_RMLOCK, + PMU_MUTEX_ID_MSGBOX, + PMU_MUTEX_ID_FIFO, + PMU_MUTEX_ID_PG, + PMU_MUTEX_ID_GR, + PMU_MUTEX_ID_CLK, + PMU_MUTEX_ID_RSVD6, + PMU_MUTEX_ID_RSVD7, + PMU_MUTEX_ID_RSVD8, + PMU_MUTEX_ID_RSVD9, + PMU_MUTEX_ID_INVALID +}; + +#define PMU_IS_COMMAND_QUEUE(id) \ + ((id) < PMU_MESSAGE_QUEUE) + +#define PMU_IS_SW_COMMAND_QUEUE(id) \ + (((id) == PMU_COMMAND_QUEUE_HPQ) || \ + ((id) == PMU_COMMAND_QUEUE_LPQ)) + +#define PMU_IS_MESSAGE_QUEUE(id) \ + ((id) == PMU_MESSAGE_QUEUE) + +enum { + OFLAG_READ = 0, + OFLAG_WRITE +}; + +#define QUEUE_SET (true) + /*todo find how to get cpu_pa*/ +#define QUEUE_GET (false) + +#define QUEUE_ALIGNMENT (4) + +#define PMU_PGENG_GR_BUFFER_IDX_INIT (0) +#define PMU_PGENG_GR_BUFFER_IDX_ZBC (1) +#define PMU_PGENG_GR_BUFFER_IDX_FECS (2) + +enum { + PMU_DMAIDX_UCODE = 0, + PMU_DMAIDX_VIRT = 1, + PMU_DMAIDX_PHYS_VID = 2, + PMU_DMAIDX_PHYS_SYS_COH = 3, + PMU_DMAIDX_PHYS_SYS_NCOH = 4, + PMU_DMAIDX_RSVD = 5, + PMU_DMAIDX_PELPG = 6, + PMU_DMAIDX_END = 7 +}; + +#define PMU_MUTEX_ID_IS_VALID(id) \ + ((id) < PMU_MUTEX_ID_INVALID) + +#define PMU_INVALID_MUTEX_OWNER_ID (0) + +struct pmu_mutex { + u32 id; + u32 index; + u32 ref_cnt; +}; + + +#define PMU_INVALID_SEQ_DESC (~0) + +enum { + PMU_SEQ_STATE_FREE = 0, + PMU_SEQ_STATE_PENDING, + PMU_SEQ_STATE_USED, + PMU_SEQ_STATE_CANCELLED +}; + +struct pmu_payload { + struct { + void *buf; + u32 offset; + u32 size; + } in, out; +}; + +typedef void (*pmu_callback)(struct nvkm_pmu *, struct pmu_msg *, void *, +u32, u32); + +struct pmu_sequence { + u8 id; + u32 state; + u32 desc; + struct pmu_msg *msg; + struct pmu_allocation_gk20a in_gk20a; + struct pmu_allocation_gk20a out_gk20a; + u8 *out_payload; + pmu_callback callback; + void *cb_params; +}; +struct pmu_gk20a_data { + struct pmu_perfmon_counter_gk20a perfmon_counter_gk20a; + u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM]; +}; + +#endif /*_GK20A_H__*/ diff --git a/drm/nouveau/nvkm/subdev/pmu/priv.h b/drm/nouveau/nvkm/subdev/pmu/priv.h index 998410563bfd..c4686e418582 100644 --- a/drm/nouveau/nvkm/subdev/pmu/priv.h +++ b/drm/nouveau/nvkm/subdev/pmu/priv.h @@ -2,7 +2,91 @@ #define __NVKM_PMU_PRIV_H__ #include <subdev/pmu.h> #include <subdev/pmu/fuc/os.h> +#include <core/object.h> +#include <core/device.h> +#include <core/parent.h> +#include <core/mm.h> +#include <linux/rwsem.h> +#include <linux/slab.h> +#include <subdev/mmu.h> +#include <core/gpuobj.h> +static inline u32 u64_hi32(u64 n) +{ + return (u32)((n >> 32) & ~(u32)0); +} + +static inline u32 u64_lo32(u64 n) +{ + return (u32)(n & ~(u32)0); +} + +/* #define ALLOCATOR_DEBUG */ + +/* main struct */ +struct nvkm_pmu_allocator { + + char name[32]; /* name for allocator */ +/*struct rb_root rb_root;*/ /* rb tree root for blocks */ + + u32 base; /* min value of this linear space */ + u32 limit; /* max value = limit - 1 */ + + unsigned long *bitmap; /* bitmap */ + + struct gk20a_alloc_block *block_first; /* first block in list */ + struct gk20a_alloc_block *block_recent; /* last visited block */ + + u32 first_free_addr; /* first free addr, non-contigous + allocation preferred start, + in order to pick up small holes */ + u32 last_free_addr; /* last free addr, contiguous + allocation preferred start */ + u32 cached_hole_size; /* max free hole size up to + last_free_addr */ + u32 block_count; /* number of blocks */ + + struct rw_semaphore rw_sema; /* lock */ + struct kmem_cache *block_cache; /* slab cache */ + + /* if enabled, constrain to [base, limit) */ + struct { + bool enable; + u32 base; + u32 limit; + } constraint; + + int (*alloc)(struct nvkm_pmu_allocator *allocator, + u32 *addr, u32 len, u32 align); + int (*free)(struct nvkm_pmu_allocator *allocator, + u32 addr, u32 len, u32 align); + +}; + +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator, + const char *name, u32 base, u32 size); +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator); + +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator, + u32 *addr, u32 len, u32 align); + +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator, + u32 addr, u32 len, u32 align); + +#if defined(ALLOCATOR_DEBUG) + +#define allocator_dbg(alloctor, format, arg...) \ +do { \ + if (1) \ + pr_debug("nvkm_pmu_allocator (%s) %s: " format "\n",\ + alloctor->name, __func__, ##arg);\ +} while (0) + +#else /* ALLOCATOR_DEBUG */ + +#define allocator_dbg(format, arg...) + +#endif /* ALLOCATOR_DEBUG */ #define nvkm_pmu_create(p, e, o, d) \ nvkm_pmu_create_((p), (e), (o), sizeof(**d), (void **)d) #define nvkm_pmu_destroy(p) \ @@ -26,6 +110,179 @@ int _nvkm_pmu_ctor(struct nvkm_object *, struct nvkm_object *, int _nvkm_pmu_init(struct nvkm_object *); int _nvkm_pmu_fini(struct nvkm_object *, bool); void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable); +#define PMU_PG_IDLE_THRESHOLD 15000 +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000 + +/* state transition : + OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF + ON => OFF is always synchronized */ +#define PMU_ELPG_STAT_OFF 0 /* elpg is off */ +#define PMU_ELPG_STAT_ON 1 /* elpg is on */ +/* elpg is off, ALLOW cmd has been sent, wait for ack */ +#define PMU_ELPG_STAT_ON_PENDING 2 +/* elpg is on, DISALLOW cmd has been sent, wait for ack */ +#define PMU_ELPG_STAT_OFF_PENDING 3 +/* elpg is off, caller has requested on, but ALLOW +cmd hasn't been sent due to ENABLE_ALLOW delay */ +#define PMU_ELPG_STAT_OFF_ON_PENDING 4 + +/* Falcon Register index */ +#define PMU_FALCON_REG_R0 (0) +#define PMU_FALCON_REG_R1 (1) +#define PMU_FALCON_REG_R2 (2) +#define PMU_FALCON_REG_R3 (3) +#define PMU_FALCON_REG_R4 (4) +#define PMU_FALCON_REG_R5 (5) +#define PMU_FALCON_REG_R6 (6) +#define PMU_FALCON_REG_R7 (7) +#define PMU_FALCON_REG_R8 (8) +#define PMU_FALCON_REG_R9 (9) +#define PMU_FALCON_REG_R10 (10) +#define PMU_FALCON_REG_R11 (11) +#define PMU_FALCON_REG_R12 (12) +#define PMU_FALCON_REG_R13 (13) +#define PMU_FALCON_REG_R14 (14) +#define PMU_FALCON_REG_R15 (15) +#define PMU_FALCON_REG_IV0 (16) +#define PMU_FALCON_REG_IV1 (17) +#define PMU_FALCON_REG_UNDEFINED (18) +#define PMU_FALCON_REG_EV (19) +#define PMU_FALCON_REG_SP (20) +#define PMU_FALCON_REG_PC (21) +#define PMU_FALCON_REG_IMB (22) +#define PMU_FALCON_REG_DMB (23) +#define PMU_FALCON_REG_CSW (24) +#define PMU_FALCON_REG_CCR (25) +#define PMU_FALCON_REG_SEC (26) +#define PMU_FALCON_REG_CTX (27) +#define PMU_FALCON_REG_EXCI (28) +#define PMU_FALCON_REG_RSVD0 (29) +#define PMU_FALCON_REG_RSVD1 (30) +#define PMU_FALCON_REG_RSVD2 (31) +#define PMU_FALCON_REG_SIZE (32) + +/* Choices for pmu_state */ +#define PMU_STATE_OFF 0 /* PMU is off */ +#define PMU_STATE_STARTING 1 /* PMU is on, but not booted */ +#define PMU_STATE_INIT_RECEIVED 2 /* PMU init message received */ +#define PMU_STATE_ELPG_BOOTING 3 /* PMU is booting */ +#define PMU_STATE_ELPG_BOOTED 4 /* ELPG is initialized */ +#define PMU_STATE_LOADING_PG_BUF 5 /* Loading PG buf */ +#define PMU_STATE_LOADING_ZBC 6 /* Loading ZBC buf */ +#define PMU_STATE_STARTED 7 /* Fully unitialized */ + +#define PMU_QUEUE_COUNT 5 + +#define PMU_MAX_NUM_SEQUENCES (256) +#define PMU_SEQ_BIT_SHIFT (5) +#define PMU_SEQ_TBL_SIZE \ + (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT) + +#define PMU_SHA1_GID_SIGNATURE 0xA7C66AD2 +#define PMU_SHA1_GID_SIGNATURE_SIZE 4 + +#define PMU_SHA1_GID_SIZE 16 + +struct pmu_queue { + + /* used by hw, for BIOS/SMI queue */ + u32 mutex_id; + u32 mutex_lock; + /* used by sw, for LPQ/HPQ queue */ + struct mutex mutex; + + /* current write position */ + u32 position; + /* physical dmem offset where this queue begins */ + u32 offset; + /* logical queue identifier */ + u32 id; + /* physical queue index */ + u32 index; + /* in bytes */ + u32 size; + + /* open-flag */ + u32 oflag; + bool opened; /* opened implies locked */ +}; + +struct pmu_sha1_gid { + bool valid; + u8 gid[PMU_SHA1_GID_SIZE]; +}; + +struct pmu_sha1_gid_data { + u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE]; + u8 gid[PMU_SHA1_GID_SIZE]; +}; + +struct pmu_desc { + + struct pmu_ucode_desc *desc; + struct pmu_buf_desc ucode; + + struct pmu_buf_desc pg_buf; + /* TBD: remove this if ZBC seq is fixed */ + struct pmu_buf_desc seq_buf; + struct pmu_buf_desc trace_buf; + bool buf_loaded; + + struct pmu_sha1_gid gid_info; + + struct pmu_queue queue[PMU_QUEUE_COUNT]; + + struct pmu_sequence *seq; + unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE]; + u32 next_seq_desc; + + struct pmu_mutex *mutex; + u32 mutex_cnt; + + struct mutex pmu_copy_lock; + struct mutex pmu_seq_lock; + + struct nvkm_pmu_allocator dmem; + + u32 *ucode_image; + bool pmu_ready; + + u32 zbc_save_done; + + u32 stat_dmem_offset; + + u32 elpg_stat; + + int pmu_state; + +#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */ + struct work_struct isr_workq; + struct mutex elpg_mutex; /* protect elpg enable/disable */ +/* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */ + int elpg_refcnt; + + bool initialized; + + void (*remove_support)(struct pmu_desc *pmu); + bool sw_ready; + bool perfmon_ready; + + u32 sample_buffer; + u32 load_shadow; + u32 load_avg; + + struct mutex isr_mutex; + bool isr_enabled; + + bool zbc_ready; + unsigned long perfmon_events_cnt; + bool perfmon_sampling_enabled; + u8 pmu_mode; + u32 falcon_id; + u32 aelpg_param[5]; + void *pmu_chip_data; + struct nvkm_pmu *pmu; +}; struct nvkm_pmu_impl { struct nvkm_oclass base; @@ -39,5 +296,12 @@ struct nvkm_pmu_impl { } data; void (*pgob)(struct nvkm_pmu *, bool); + struct pmu_desc pmudata; }; + +static inline struct nvkm_pmu *impl_from_pmu(struct pmu_desc *pmu) +{ + return pmu->pmu; +} + #endif -- 1.9.1
Hi Deepak, There's... a lot of stuff going on here. Can you describe the goal of this patch (which could then be used as the patch commit message)? The current one basically boils down to "Add support for loading PMU", but merely loading the fw into a fuc engine is just a handful lines of code. Also, except in rare cases, it's customary to split up patches of this size into smaller, more reviewable chunks, which add on bits of functionality as they go.>From what I can tell, you're adding the kernel-side interface for ahypothetical (and presumably closed-source) PMU blob that NVIDIA will supply. In essence, the blob is expected to implement a RTOS which runs on the PMU's falcon CPU. There are a bunch of API's implemented by this blob that the host can call, but it also does things on its own. For the kernel side, each of these API calls should probably be a separate patch (after an initial "just load it and do nothing" style patch). Or perhaps have the infrastructure that you add first and then something that implements the API calls. Cheers, -ilia On Wed, Mar 11, 2015 at 2:33 AM, Deepak Goyal <dgoyal at nvidia.com> wrote:> It adds PMU boot support.It loads PMU > firmware into PMU falcon.RM/Kernel driver > receives INIT ack (through interrupt mechanism) > from PMU when PMU boots with success. > > Signed-off-by: Deepak Goyal <dgoyal at nvidia.com> > --- > drm/nouveau/include/nvkm/subdev/pmu.h | 26 +- > drm/nouveau/nvkm/subdev/pmu/base.c | 108 ++ > drm/nouveau/nvkm/subdev/pmu/gk20a.c | 2131 ++++++++++++++++++++++++++++++++- > drm/nouveau/nvkm/subdev/pmu/gk20a.h | 369 ++++++ > drm/nouveau/nvkm/subdev/pmu/priv.h | 264 ++++ > 5 files changed, 2884 insertions(+), 14 deletions(-) > create mode 100644 drm/nouveau/nvkm/subdev/pmu/gk20a.h > > diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h b/drm/nouveau/include/nvkm/subdev/pmu.h > index 7b86acc634a0..659b4e0ba02b 100644 > --- a/drm/nouveau/include/nvkm/subdev/pmu.h > +++ b/drm/nouveau/include/nvkm/subdev/pmu.h > @@ -1,7 +1,20 @@ > #ifndef __NVKM_PMU_H__ > #define __NVKM_PMU_H__ > #include <core/subdev.h> > +#include <core/device.h> > +#include <subdev/mmu.h> > +#include <linux/debugfs.h> > > +struct pmu_buf_desc { > + struct nvkm_gpuobj *pmubufobj; > + struct nvkm_vma pmubufvma; > + size_t size; > +}; > +struct pmu_priv_vm { > + struct nvkm_gpuobj *mem; > + struct nvkm_gpuobj *pgd; > + struct nvkm_vm *vm; > +}; > struct nvkm_pmu { > struct nvkm_subdev base; > > @@ -20,9 +33,20 @@ struct nvkm_pmu { > u32 message; > u32 data[2]; > } recv; > - > + wait_queue_head_t init_wq; > + bool gr_initialised; > + struct dentry *debugfs; > + struct pmu_buf_desc *pg_buf; > + struct pmu_priv_vm *pmuvm; > int (*message)(struct nvkm_pmu *, u32[2], u32, u32, u32, u32); > void (*pgob)(struct nvkm_pmu *, bool); > + int (*pmu_mutex_acquire)(struct nvkm_pmu *, u32 id, u32 *token); > + int (*pmu_mutex_release)(struct nvkm_pmu *, u32 id, u32 *token); > + int (*pmu_load_norm)(struct nvkm_pmu *pmu, u32 *load); > + int (*pmu_load_update)(struct nvkm_pmu *pmu); > + void (*pmu_reset_load_counters)(struct nvkm_pmu *pmu); > + void (*pmu_get_load_counters)(struct nvkm_pmu *pmu, u32 *busy_cycles, > + u32 *total_cycles); > }; > > static inline struct nvkm_pmu * > diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c b/drm/nouveau/nvkm/subdev/pmu/base.c > index 054b2d2eec35..6afd389b9764 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/base.c > +++ b/drm/nouveau/nvkm/subdev/pmu/base.c > @@ -25,6 +25,114 @@ > > #include <subdev/timer.h> > > +/* init allocator struct */ > +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator, > + const char *name, u32 start, u32 len) > +{ > + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator)); > + > + strncpy(allocator->name, name, 32); > + > + allocator->base = start; > + allocator->limit = start + len - 1; > + > + allocator->bitmap = kcalloc(BITS_TO_LONGS(len), sizeof(long), > + GFP_KERNEL); > + if (!allocator->bitmap) > + return -ENOMEM; > + > + allocator_dbg(allocator, "%s : base %d, limit %d", > + allocator->name, allocator->base); > + > + init_rwsem(&allocator->rw_sema); > + > + allocator->alloc = nvkm_pmu_allocator_block_alloc; > + allocator->free = nvkm_pmu_allocator_block_free; > + > + return 0; > +} > + > +/* destroy allocator, free all remaining blocks if any */ > +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator) > +{ > + down_write(&allocator->rw_sema); > + > + kfree(allocator->bitmap); > + > + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator)); > +} > + > +/* > + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is > + * returned to caller in *addr. > + * > + * contiguous allocation, which allocates one block of > + * contiguous address. > +*/ > +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator, > + u32 *addr, u32 len, u32 align) > +{ > + unsigned long _addr; > + > + allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); > + > + if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ > + *addr + len > allocator->limit || /* check addr range */ > + *addr & (align - 1) || /* check addr alignment */ > + len == 0) /* check len */ > + return -EINVAL; > + > + len = ALIGN(len, align); > + if (!len) > + return -ENOMEM; > + > + down_write(&allocator->rw_sema); > + > + _addr = bitmap_find_next_zero_area(allocator->bitmap, > + allocator->limit - allocator->base + 1, > + *addr ? (*addr - allocator->base) : 0, > + len, > + align - 1); > + if ((_addr > allocator->limit - allocator->base + 1) || > + (*addr && *addr != (_addr + allocator->base))) { > + up_write(&allocator->rw_sema); > + return -ENOMEM; > + } > + > + bitmap_set(allocator->bitmap, _addr, len); > + *addr = allocator->base + _addr; > + > + up_write(&allocator->rw_sema); > + > + allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); > + > + return 0; > +} > + > +/* free all blocks between start and end */ > +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator, > + u32 addr, u32 len, u32 align) > +{ > + allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); > + > + if (addr + len > allocator->limit || /* check addr range */ > + addr < allocator->base || > + addr & (align - 1)) /* check addr alignment */ > + return -EINVAL; > + > + len = ALIGN(len, align); > + if (!len) > + return -EINVAL; > + > + down_write(&allocator->rw_sema); > + bitmap_clear(allocator->bitmap, addr - allocator->base, len); > + up_write(&allocator->rw_sema); > + > + allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); > + > + return 0; > +} > + > void > nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable) > { > diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c > index a49934bbe637..0fd2530301a3 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c > +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c > @@ -20,21 +20,67 @@ > * DEALINGS IN THE SOFTWARE. > */ > #include "priv.h" > +#include "gk20a.h" > +#include <core/client.h> > +#include <core/gpuobj.h> > +#include <subdev/bar.h> > +#include <subdev/fb.h> > +#include <subdev/mc.h> > +#include <subdev/timer.h> > +#include <subdev/mmu.h> > +#include <subdev/pmu.h> > +#include <engine/falcon.h> > > +#include <linux/delay.h> /* for mdelay */ > +#include <linux/firmware.h> > +#include <linux/clk.h> > +#include <linux/module.h> > +#include <linux/debugfs.h> > +#include <linux/dma-mapping.h> > +#include <linux/uaccess.h> > #include <subdev/clk.h> > #include <subdev/timer.h> > #include <subdev/volt.h> > > #define BUSY_SLOT 0 > #define CLK_SLOT 7 > +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" > + > +static int falc_trace_show(struct seq_file *s, void *data); > +static int falc_trace_open(struct inode *inode, struct file *file) > +{ > + return single_open(file, falc_trace_show, inode->i_private); > +} > +static const struct file_operations falc_trace_fops = { > + .open = falc_trace_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = single_release, > +}; > +struct pmu_priv_vm pmuvm; > +const struct firmware *pmufw; > + > +static void gk20a_pmu_isr(struct nvkm_pmu *ppmu); > +static void pmu_process_message(struct work_struct *work); > + > +static int > +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw); > +static void > +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, const struct firmware *fw); > + > +static int > +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw); > +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu); > +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc); > +static void gk20a_pmu_intr(struct nvkm_subdev *subdev); > > +static void gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable); > struct gk20a_pmu_dvfs_data { > int p_load_target; > int p_load_max; > int p_smooth; > unsigned int avg_load; > }; > - > struct gk20a_pmu_priv { > struct nvkm_pmu base; > struct nvkm_alarm alarm; > @@ -46,7 +92,30 @@ struct gk20a_pmu_dvfs_dev_status { > unsigned long busy; > int cur_state; > }; > - > +int gk20a_pmu_debugfs_init(struct nvkm_pmu *ppmu) > +{ > + struct dentry *d; > + ppmu->debugfs = debugfs_create_dir("PMU", NULL); > + if (!ppmu->debugfs) > + goto err_out; > + nv_debug(ppmu, "PMU directory created with success\n"); > + d = debugfs_create_file( > + "falc_trace", 0644, ppmu->debugfs, ppmu, > + &falc_trace_fops); > + if (!d) > + goto err_out; > + return 0; > +err_out: > + pr_err("%s: Failed to make debugfs node\n", __func__); > + debugfs_remove_recursive(ppmu->debugfs); > + return -ENOMEM; > +} > +void gk20a_pmu_release_firmware(struct nvkm_pmu *ppmu, > + const struct firmware *pfw) > +{ > + nv_debug(ppmu, "firmware released\n"); > + release_firmware(pfw); > +} > static int > gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state) > { > @@ -164,31 +233,145 @@ gk20a_pmu_fini(struct nvkm_object *object, bool suspend) > { > struct nvkm_pmu *pmu = (void *)object; > struct gk20a_pmu_priv *priv = (void *)pmu; > - > + nv_wr32(pmu, 0x10a014, 0x00000060); > + flush_work(&pmu->recv.work); > nvkm_timer_alarm_cancel(priv, &priv->alarm); > > return nvkm_subdev_fini(&pmu->base, suspend); > } > +static bool find_hex_in_string(char *strings, u32 *hex_pos) > +{ > + u32 i = 0, j = strlen(strings); > + for (; i < j; i++) { > + if (strings[i] == '%') > + if (strings[i + 1] == 'x' || strings[i + 1] == 'X') { > + *hex_pos = i; > + return true; > + } > + } > + *hex_pos = -1; > + return false; > +} > +static int falc_trace_show(struct seq_file *s, void *data) > +{ > + struct nvkm_pmu *ppmu = s->private; > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + u32 i = 0, j = 0, k, l, m; > + char part_str[40]; > + u32 data1; > + char *log_data = kmalloc(GK20A_PMU_TRACE_BUFSIZE, GFP_KERNEL); > + char *trace = log_data; > + u32 *trace1 = (u32 *)log_data; > + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 4) { > + data1 = nv_ro32(pmu->trace_buf.pmubufobj, 0x0000 + i); > + memcpy(log_data + i, (void *)(&data1), 32); > + } > + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { > + for (j = 0; j < 0x40; j++) > + if (trace1[(i / 4) + j]) > + break; > + if (j == 0x40) > + goto out; > + seq_printf(s, "Index %x: ", trace1[(i / 4)]); > + l = 0; > + m = 0; > + while (find_hex_in_string((trace+i+20+m), &k)) { > + if (k >= 40) > + break; > + strncpy(part_str, (trace+i+20+m), k); > + part_str[k] = 0; > + seq_printf(s, "%s0x%x", part_str, > + trace1[(i / 4) + 1 + l]); > + l++; > + m += k + 2; > + } > + seq_printf(s, "%s", (trace+i+20+m)); > + } > +out: > + kfree(log_data); > + return 0; > +} > > int > gk20a_pmu_init(struct nvkm_object *object) > { > - struct nvkm_pmu *pmu = (void *)object; > - struct gk20a_pmu_priv *priv = (void *)pmu; > + struct nvkm_pmu *ppmu = (void *)object; > + struct nvkm_mc *pmc = nvkm_mc(object); > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu; > + struct gk20a_pmu_priv *priv; > + struct pmu_gk20a_data *gk20adata; > int ret; > > - ret = nvkm_subdev_init(&pmu->base); > + pmu = &impl->pmudata; > + > + nv_subdev(ppmu)->intr = gk20a_pmu_intr; > + > + mutex_init(&pmu->isr_mutex); > + mutex_init(&pmu->pmu_copy_lock); > + mutex_init(&pmu->pmu_seq_lock); > + > + if (pmufw == NULL) { > + ret = gk20a_pmu_load_firmware(ppmu, &pmufw); > + if (ret < 0) { > + nv_error(ppmu, "failed to load pmu fimware\n"); > + return ret; > + } > + nv_debug(ppmu, "loading firmware sucessful\n"); > + ret = gk20a_pmu_init_vm(ppmu, pmufw); > + if (ret < 0) { > + nv_error(ppmu, "failed to map pmu fw to va space\n"); > + goto init_vm_err; > + } > + } > + pmu->desc = (struct pmu_ucode_desc *)pmufw->data; > + gk20a_pmu_dump_firmware_info(ppmu, pmufw); > + > + if (pmu->desc->app_version != APP_VERSION_GK20A) { > + nv_error(ppmu, > + "PMU code version not supported version: %d\n", > + pmu->desc->app_version); > + ret = -EINVAL; > + goto app_ver_err; > + } > + gk20adata = kzalloc(sizeof(*gk20adata), GFP_KERNEL); > + if (!gk20adata) { > + ret = -ENOMEM; > + goto err; > + } > + > + pmu->pmu_chip_data = (void *)gk20adata; > + > + pmu->remove_support = gk20a_remove_pmu_support; > + > + ret = gk20a_init_pmu_setup_sw(ppmu); > if (ret) > - return ret; > + goto err; > + > + pmu->pmu_state = PMU_STATE_STARTING; > + ret = gk20a_init_pmu_setup_hw1(ppmu, pmc); > + if (ret) > + goto err; > + > + priv = (void *)ppmu; > > - pmu->pgob = nvkm_pmu_pgob; > + ret = nvkm_subdev_init(&ppmu->base); > + if (ret) > + goto err; > + > + ppmu->pgob = nvkm_pmu_pgob; > > - /* init pwr perf counter */ > - nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); > - nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); > - nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); > + /* init pmu perf counter */ > + nv_wr32(ppmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); > + nv_wr32(ppmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); > + nv_wr32(ppmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); > > - nvkm_timer_alarm(pmu, 2000000000, &priv->alarm); > + nvkm_timer_alarm(ppmu, 2000000000, &priv->alarm); > +err: > +init_vm_err: > +app_ver_err: > + gk20a_pmu_release_firmware(ppmu, pmufw); > return ret; > } > > @@ -226,4 +409,1926 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) { > .init = gk20a_pmu_init, > .fini = gk20a_pmu_fini, > }, > + .base.handle = NV_SUBDEV(PMU, 0xea), > + .pgob = gk20a_pmu_pgob, > }.base; > +void pmu_copy_from_dmem(struct pmu_desc *pmu, > + u32 src, u8 *dst, u32 size, u8 port) > +{ > + u32 i, words, bytes; > + u32 data, addr_mask; > + u32 *dst_u32 = (u32 *)dst; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + if (size == 0) { > + nv_error(ppmu, "size is zero\n"); > + goto out; > + } > + > + if (src & 0x3) { > + nv_error(ppmu, "src (0x%08x) not 4-byte aligned\n", src); > + goto out; > + } > + > + mutex_lock(&pmu->pmu_copy_lock); > + > + words = size >> 2; > + bytes = size & 0x3; > + > + addr_mask = (0x3f << 2) | 0xff << 8; > + > + src &= addr_mask; > + > + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (src | (0x1 << 25))); > + > + for (i = 0; i < words; i++) { > + dst_u32[i] = nv_rd32(ppmu, (0x0010a1c4 + port * 8)); > + nv_debug(ppmu, "0x%08x\n", dst_u32[i]); > + } > + if (bytes > 0) { > + data = nv_rd32(ppmu, (0x0010a1c4 + port * 8)); > + nv_debug(ppmu, "0x%08x\n", data); > + > + for (i = 0; i < bytes; i++) > + dst[(words << 2) + i] = ((u8 *)&data)[i]; > + } > + mutex_unlock(&pmu->pmu_copy_lock); > +out: > + nv_debug(ppmu, "exit %s\n", __func__); > +} > + > +void pmu_copy_to_dmem(struct pmu_desc *pmu, > + u32 dst, u8 *src, u32 size, u8 port) > +{ > + u32 i, words, bytes; > + u32 data, addr_mask; > + u32 *src_u32 = (u32 *)src; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + if (size == 0) { > + nv_error(ppmu, "size is zero\n"); > + goto out; > + } > + > + if (dst & 0x3) { > + nv_error(ppmu, "dst (0x%08x) not 4-byte aligned\n", dst); > + goto out; > + } > + > + mutex_lock(&pmu->pmu_copy_lock); > + > + words = size >> 2; > + bytes = size & 0x3; > + > + addr_mask = (0x3f << 2) | 0xff << 8; > + > + dst &= addr_mask; > + > + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24))); > + > + for (i = 0; i < words; i++) { > + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), src_u32[i]); > + nv_debug(ppmu, "0x%08x\n", src_u32[i]); > + } > + if (bytes > 0) { > + data = 0; > + for (i = 0; i < bytes; i++) > + ((u8 *)&data)[i] = src[(words << 2) + i]; > + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), data); > + nv_debug(ppmu, "0x%08x\n", data); > + } > + > + data = nv_rd32(ppmu, (0x10a1c0 + (port * 8))) & addr_mask; > + size = ALIGN(size, 4); > + if (data != dst + size) { > + nv_error(ppmu, "copy failed. bytes written %d, expected %d", > + data - dst, size); > + } > + mutex_unlock(&pmu->pmu_copy_lock); > +out: > + nv_debug(ppmu, "exit %s", __func__); > +} > + > +static int pmu_idle(struct nvkm_pmu *ppmu) > +{ > + unsigned long end_jiffies = jiffies + > + msecs_to_jiffies(2000); > + u32 idle_stat; > + > + /* wait for pmu idle */ > + do { > + idle_stat = nv_rd32(ppmu, 0x0010a04c); > + > + if (((idle_stat & 0x01) == 0) && > + ((idle_stat >> 1) & 0x7fff) == 0) { > + break; > + } > + > + if (time_after_eq(jiffies, end_jiffies)) { > + nv_error(ppmu, "timeout waiting pmu idle : 0x%08x", > + idle_stat); > + return -EBUSY; > + } > + usleep_range(100, 200); > + } while (1); > + > + return 0; > +} > + > +void pmu_enable_irq(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, > + bool enable) > +{ > + > + nv_wr32(pmc, 0x00000640, > + nv_rd32(pmc, 0x00000640) & > + ~0x1000000); > + nv_wr32(pmc, 0x00000644, > + nv_rd32(pmc, 0x00000644) & > + ~0x1000000); > + nv_wr32(ppmu, 0x0010a014, 0xff); > + > + if (enable) { > + nv_debug(ppmu, "enable pmu irq\n"); > + /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 > + nv_wr32(ppmu, 0x0010a01c, 0xff01ff52); > + 0=disable, 1=enable*/ > + > + nv_wr32(ppmu, 0x0010a010, 0xff); > + nv_wr32(pmc, 0x00000640, > + nv_rd32(pmc, 0x00000640) | > + 0x1000000); > + nv_wr32(pmc, 0x00000644, > + nv_rd32(pmc, 0x00000644) | > + 0x1000000); > + } else { > + nv_debug(ppmu, "disable pmu irq\n"); > + } > + > +} > + > +static int pmu_enable_hw(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, > + bool enable) > +{ > + u32 reg; > + > + if (enable) { > + int retries = GK20A_IDLE_CHECK_MAX / GK20A_IDLE_CHECK_DEFAULT; > + /*need a spinlock?*/ > + reg = nv_rd32(pmc, 0x00000200); > + reg |= 0x2000; > + nv_wr32(pmc, 0x00000200, reg); > + nv_rd32(pmc, 0x00000200); > + do { > + u32 w = nv_rd32(ppmu, 0x0010a10c) & 0x6; > + > + if (!w) > + return 0; > + > + udelay(GK20A_IDLE_CHECK_DEFAULT); > + } while (--retries); > + > + reg = nv_rd32(pmc, 0x00000200); > + reg &= ~0x2000; > + nv_wr32(pmc, 0x00000200, reg); > + nv_error(ppmu, "Falcon mem scrubbing timeout\n"); > + > + goto error; > + } else { > + reg = nv_rd32(pmc, 0x00000200); > + reg &= ~0x2000; > + nv_wr32(pmc, 0x00000200, reg); > + return 0; > + } > +error: > + return -ETIMEDOUT; > +} > + > +static int pmu_enable(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, > + bool enable) > +{ > + u32 pmc_enable; > + int err; > + > + if (!enable) { > + pmc_enable = nv_rd32(pmc, 0x200); > + if ((pmc_enable & 0x2000) != 0x0) { > + pmu_enable_irq(ppmu, pmc, false); > + pmu_enable_hw(ppmu, pmc, false); > + } > + } else { > + err = pmu_enable_hw(ppmu, pmc, true); > + if (err) > + return err; > + > + /* TBD: post reset */ > + > + err = pmu_idle(ppmu); > + if (err) > + return err; > + > + pmu_enable_irq(ppmu, pmc, true); > + } > + > + return 0; > +} > + > +int pmu_reset(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) > +{ > + int err; > + > + err = pmu_idle(ppmu); > + if (err) > + return err; > + > + /* TBD: release pmu hw mutex */ > + > + err = pmu_enable(ppmu, pmc, false); > + if (err) > + return err; > + > + err = pmu_enable(ppmu, pmc, true); > + if (err) > + return err; > + > + return 0; > +} > + > +static int pmu_bootstrap(struct pmu_desc *pmu) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_ucode_desc *desc = pmu->desc; > + u64 addr_code, addr_data, addr_load; > + u32 i, blocks, addr_args; > + u32 *adr_data, *adr_load, *adr_code; > + struct pmu_cmdline_args_gk20a cmdline_args; > + struct pmu_priv_vm *ppmuvm = &pmuvm; > + > + nv_wr32(ppmu, 0x0010a048, > + nv_rd32(ppmu, 0x0010a048) | 0x01); > + /*bind the address*/ > + nv_wr32(ppmu, 0x0010a480, > + ppmuvm->mem->addr >> 12 | > + 0x1 << 30 | > + 0x20000000); > + > + /* TBD: load all other surfaces */ > + cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE; > + cmdline_args.falc_trace_dma_base > + u64_lo32(pmu->trace_buf.pmubufvma.offset >> 8); > + cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT; > + cmdline_args.cpu_freq_hz = 204; > + cmdline_args.secure_mode = 0; > + > + addr_args = (nv_rd32(ppmu, 0x0010a108) >> 9) & 0x1ff; > + addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2; > + addr_args -= sizeof(struct pmu_cmdline_args_gk20a); > + nv_debug(ppmu, "initiating copy to dmem\n"); > + pmu_copy_to_dmem(pmu, addr_args, > + (u8 *)&cmdline_args, > + sizeof(struct pmu_cmdline_args_gk20a), 0); > + > + nv_wr32(ppmu, 0x0010a1c0, 0x1 << 24); > + > + > + addr_code = u64_lo32((pmu->ucode.pmubufvma.offset + > + desc->app_start_offset + > + desc->app_resident_code_offset) >> 8); > + > + addr_data = u64_lo32((pmu->ucode.pmubufvma.offset + > + desc->app_start_offset + > + desc->app_resident_data_offset) >> 8); > + > + addr_load = u64_lo32((pmu->ucode.pmubufvma.offset + > + desc->bootloader_start_offset) >> 8); > + > + adr_code = (u32 *) (&addr_code); > + adr_load = (u32 *) (&addr_load); > + adr_data = (u32 *) (&addr_data); > + nv_wr32(ppmu, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE); > + nv_debug(ppmu, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE); > + nv_wr32(ppmu, 0x0010a1c4, *(adr_code)); > + nv_debug(ppmu, "0x%08x\n", *(adr_code)); > + nv_wr32(ppmu, 0x0010a1c4, desc->app_size); > + nv_debug(ppmu, "0x%08x\n", desc->app_size); > + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_code_size); > + nv_debug(ppmu, "0x%08x\n", desc->app_resident_code_size); > + nv_wr32(ppmu, 0x0010a1c4, desc->app_imem_entry); > + nv_debug(ppmu, "0x%08x\n", desc->app_imem_entry); > + nv_wr32(ppmu, 0x0010a1c4, *(adr_data)); > + nv_debug(ppmu, "0x%08x\n", *(adr_data)); > + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_data_size); > + nv_debug(ppmu, "0x%08x\n", desc->app_resident_data_size); > + nv_wr32(ppmu, 0x0010a1c4, *(adr_code)); > + nv_debug(ppmu, "0x%08x\n", *(adr_code)); > + nv_wr32(ppmu, 0x0010a1c4, 0x1); > + nv_debug(ppmu, "0x%08x\n", 1); > + nv_wr32(ppmu, 0x0010a1c4, addr_args); > + nv_debug(ppmu, "0x%08x\n", addr_args); > + > + > + nv_wr32(ppmu, 0x0010a110, > + *(adr_load) - (desc->bootloader_imem_offset >> 8)); > + > + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; > + > + for (i = 0; i < blocks; i++) { > + nv_wr32(ppmu, 0x0010a114, > + desc->bootloader_imem_offset + (i << 8)); > + nv_wr32(ppmu, 0x0010a11c, > + desc->bootloader_imem_offset + (i << 8)); > + nv_wr32(ppmu, 0x0010a118, > + 0x01 << 4 | > + 0x06 << 8 | > + ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12)); > + } > + > + > + nv_wr32(ppmu, 0x0010a104, > + (0xffffffff & desc->bootloader_entry_point)); > + > + nv_wr32(ppmu, 0x0010a100, 0x1 << 1); > + > + nv_wr32(ppmu, 0x0010a080, desc->app_version); > + > + return 0; > +} > + > +void pmu_seq_init(struct pmu_desc *pmu) > +{ > + u32 i; > + > + memset(pmu->seq, 0, > + sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES); > + memset(pmu->pmu_seq_tbl, 0, > + sizeof(pmu->pmu_seq_tbl)); > + > + for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++) > + pmu->seq[i].id = i; > +} > + > +static int pmu_seq_acquire(struct pmu_desc *pmu, > + struct pmu_sequence **pseq) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_sequence *seq; > + u32 index; > + > + mutex_lock(&pmu->pmu_seq_lock); > + index = find_first_zero_bit(pmu->pmu_seq_tbl, > + sizeof(pmu->pmu_seq_tbl)); > + if (index >= sizeof(pmu->pmu_seq_tbl)) { > + nv_error(ppmu, > + "no free sequence available"); > + mutex_unlock(&pmu->pmu_seq_lock); > + return -EAGAIN; > + } > + set_bit(index, pmu->pmu_seq_tbl); > + mutex_unlock(&pmu->pmu_seq_lock); > + > + seq = &pmu->seq[index]; > + seq->state = PMU_SEQ_STATE_PENDING; > + > + *pseq = seq; > + return 0; > +} > + > +static void pmu_seq_release(struct pmu_desc *pmu, > + struct pmu_sequence *seq) > +{ > + seq->state = PMU_SEQ_STATE_FREE; > + seq->desc = PMU_INVALID_SEQ_DESC; > + seq->callback = NULL; > + seq->cb_params = NULL; > + seq->msg = NULL; > + seq->out_payload = NULL; > + seq->in_gk20a.alloc.dmem.size = 0; > + seq->out_gk20a.alloc.dmem.size = 0; > + clear_bit(seq->id, pmu->pmu_seq_tbl); > +} > + > +static int pmu_queue_init(struct pmu_desc *pmu, > + u32 id, struct pmu_init_msg_pmu_gk20a *init) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_queue *queue = &pmu->queue[id]; > + > + queue->id = id; > + queue->index = init->queue_info[id].index; > + queue->offset = init->queue_info[id].offset; > + queue->size = init->queue_info[id].size; > + queue->mutex_id = id; > + mutex_init(&queue->mutex); > + > + nv_debug(ppmu, "queue %d: index %d, offset 0x%08x, size 0x%08x", > + id, queue->index, queue->offset, queue->size); > + > + return 0; > +} > + > +static int pmu_queue_head(struct pmu_desc *pmu, struct pmu_queue *queue, > + u32 *head, bool set) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + BUG_ON(!head); > + > + if (PMU_IS_COMMAND_QUEUE(queue->id)) { > + > + if (queue->index >= 0x00000004) > + return -EINVAL; > + > + if (!set) > + *head = nv_rd32(ppmu, 0x0010a4a0 + (queue->index * 4)) & > + 0xffffffff; > + else > + nv_wr32(ppmu, > + (0x0010a4a0 + (queue->index * 4)), > + (*head & 0xffffffff)); > + } else { > + if (!set) > + *head = nv_rd32(ppmu, 0x0010a4c8) & 0xffffffff; > + else > + nv_wr32(ppmu, 0x0010a4c8, (*head & 0xffffffff)); > + } > + > + return 0; > +} > + > +static int pmu_queue_tail(struct pmu_desc *pmu, struct pmu_queue *queue, > + u32 *tail, bool set) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + BUG_ON(!tail); > + > + if (PMU_IS_COMMAND_QUEUE(queue->id)) { > + > + if (queue->index >= 0x00000004) > + return -EINVAL; > + > + if (!set) > + *tail = nv_rd32(ppmu, 0x0010a4b0 + (queue->index * 4)) & > + 0xffffffff; > + else > + nv_wr32(ppmu, (0x0010a4b0 + (queue->index * 4)), > + (*tail & 0xffffffff)); > + } else { > + if (!set) > + *tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff; > + else > + nv_wr32(ppmu, 0x0010a4cc, (*tail & 0xffffffff)); > + } > + > + return 0; > +} > + > +static inline void pmu_queue_read(struct pmu_desc *pmu, > + u32 offset, u8 *dst, u32 size) > +{ > + pmu_copy_from_dmem(pmu, offset, dst, size, 0); > +} > + > +static inline void pmu_queue_write(struct pmu_desc *pmu, > + u32 offset, u8 *src, u32 size) > +{ > + pmu_copy_to_dmem(pmu, offset, src, size, 0); > +} > + > +int pmu_mutex_acquire(struct nvkm_pmu *ppmu, u32 id, u32 *token) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct pmu_mutex *mutex; > + u32 data, owner, max_retry; > + > + if (!pmu->initialized) > + return -EINVAL; > + > + BUG_ON(!token); > + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); > + BUG_ON(id > pmu->mutex_cnt); > + > + mutex = &pmu->mutex[id]; > + > + owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff; > + > + if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) { > + BUG_ON(mutex->ref_cnt == 0); > + nv_debug(ppmu, "already acquired by owner : 0x%08x", *token); > + mutex->ref_cnt++; > + return 0; > + } > + > + max_retry = 40; > + do { > + data = nv_rd32(ppmu, 0x0010a488) & 0xff; > + if (data == 0x00000000 || > + data == 0x000000ff) { > + nv_warn(ppmu, > + "fail to generate mutex token: val 0x%08x", > + owner); > + usleep_range(20, 40); > + continue; > + } > + > + owner = data; > + nv_wr32(ppmu, (0x0010a580 + mutex->index * 4), > + owner & 0xff); > + > + data = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)); > + > + if (owner == data) { > + mutex->ref_cnt = 1; > + nv_debug(ppmu, "mutex acquired: id=%d, token=0x%x", > + mutex->index, *token); > + *token = owner; > + goto out; > + } else { > + nv_debug(ppmu, "fail to acquire mutex idx=0x%08x", > + mutex->index); > + > + nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff)); > + > + usleep_range(20, 40); > + continue; > + } > + } while (max_retry-- > 0); > + > + return -EBUSY; > +out: > + return 0; > +} > + > +int pmu_mutex_release(struct nvkm_pmu *ppmu, u32 id, u32 *token) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct pmu_mutex *mutex; > + u32 owner; > + > + if (!pmu->initialized) > + return -EINVAL; > + > + BUG_ON(!token); > + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); > + BUG_ON(id > pmu->mutex_cnt); > + > + mutex = &pmu->mutex[id]; > + > + owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff; > + > + if (*token != owner) { > + nv_error(ppmu, > + "requester 0x%08x NOT match owner 0x%08x", > + *token, owner); > + return -EINVAL; > + } > + > + if (--mutex->ref_cnt > 0) > + return -EBUSY; > + > + nv_wr32(ppmu, 0x0010a580 + (mutex->index * 4), 0x00); > + > + nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff)); > + > + nv_debug(ppmu, "mutex released: id=%d, token=0x%x", > + mutex->index, *token); > + > + return 0; > +} > + > +static int pmu_queue_lock(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + int ret; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + if (PMU_IS_MESSAGE_QUEUE(queue->id)) > + return 0; > + > + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { > + mutex_lock(&queue->mutex); > + return 0; > + } > + > + ret = pmu_mutex_acquire(ppmu, queue->mutex_id, &queue->mutex_lock); > + return ret; > +} > + > +static int pmu_queue_unlock(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + int ret; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + if (PMU_IS_MESSAGE_QUEUE(queue->id)) > + return 0; > + > + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { > + mutex_unlock(&queue->mutex); > + return 0; > + } > + > + ret = pmu_mutex_release(ppmu, queue->mutex_id, &queue->mutex_lock); > + return ret; > +} > + > +/* called by pmu_read_message, no lock */ > +static bool pmu_queue_is_empty(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + u32 head, tail; > + > + pmu_queue_head(pmu, queue, &head, QUEUE_GET); > + if (queue->opened && queue->oflag == OFLAG_READ) > + tail = queue->position; > + else > + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); > + > + return head == tail; > +} > + > +static bool pmu_queue_has_room(struct pmu_desc *pmu, > + struct pmu_queue *queue, u32 size, bool *need_rewind) > +{ > + u32 head, tail, free; > + bool rewind = false; > + > + size = ALIGN(size, QUEUE_ALIGNMENT); > + > + pmu_queue_head(pmu, queue, &head, QUEUE_GET); > + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); > + > + if (head >= tail) { > + free = queue->offset + queue->size - head; > + free -= PMU_CMD_HDR_SIZE; > + > + if (size > free) { > + rewind = true; > + head = queue->offset; > + } > + } > + > + if (head < tail) > + free = tail - head - 1; > + > + if (need_rewind) > + *need_rewind = rewind; > + > + return size <= free; > +} > + > +static int pmu_queue_push(struct pmu_desc *pmu, > + struct pmu_queue *queue, void *data, u32 size) > +{ > + > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + if (!queue->opened && queue->oflag == OFLAG_WRITE) { > + nv_error(ppmu, "queue not opened for write\n"); > + return -EINVAL; > + } > + > + pmu_queue_write(pmu, queue->position, data, size); > + queue->position += ALIGN(size, QUEUE_ALIGNMENT); > + return 0; > +} > + > +static int pmu_queue_pop(struct pmu_desc *pmu, > + struct pmu_queue *queue, void *data, u32 size, > + u32 *bytes_read) > +{ > + u32 head, tail, used; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + *bytes_read = 0; > + > + if (!queue->opened && queue->oflag == OFLAG_READ) { > + nv_error(ppmu, "queue not opened for read\n"); > + return -EINVAL; > + } > + > + pmu_queue_head(pmu, queue, &head, QUEUE_GET); > + tail = queue->position; > + > + if (head == tail) > + return 0; > + > + if (head > tail) > + used = head - tail; > + else > + used = queue->offset + queue->size - tail; > + > + if (size > used) { > + nv_warn(ppmu, "queue size smaller than request read\n"); > + size = used; > + } > + > + pmu_queue_read(pmu, tail, data, size); > + queue->position += ALIGN(size, QUEUE_ALIGNMENT); > + *bytes_read = size; > + return 0; > +} > + > +static void pmu_queue_rewind(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + struct pmu_cmd cmd; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + > + if (!queue->opened) { > + nv_error(ppmu, "queue not opened\n"); > + goto out; > + } > + > + if (queue->oflag == OFLAG_WRITE) { > + cmd.hdr.unit_id = PMU_UNIT_REWIND; > + cmd.hdr.size = PMU_CMD_HDR_SIZE; > + pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size); > + nv_debug(ppmu, "queue %d rewinded\n", queue->id); > + } > + > + queue->position = queue->offset; > +out: > + nv_debug(ppmu, "exit %s\n", __func__); > +} > + > +/* open for read and lock the queue */ > +static int pmu_queue_open_read(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + int err; > + > + err = pmu_queue_lock(pmu, queue); > + if (err) > + return err; > + > + if (queue->opened) > + BUG(); > + > + pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET); > + queue->oflag = OFLAG_READ; > + queue->opened = true; > + > + return 0; > +} > + > +/* open for write and lock the queue > + make sure there's enough free space for the write */ > +static int pmu_queue_open_write(struct pmu_desc *pmu, > + struct pmu_queue *queue, u32 size) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + bool rewind = false; > + int err; > + > + err = pmu_queue_lock(pmu, queue); > + if (err) > + return err; > + > + if (queue->opened) > + BUG(); > + > + if (!pmu_queue_has_room(pmu, queue, size, &rewind)) { > + nv_error(ppmu, "queue full"); > + pmu_queue_unlock(pmu, queue); > + return -EAGAIN; > + } > + > + pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET); > + queue->oflag = OFLAG_WRITE; > + queue->opened = true; > + > + if (rewind) > + pmu_queue_rewind(pmu, queue); > + > + return 0; > +} > + > +/* close and unlock the queue */ > +static int pmu_queue_close(struct pmu_desc *pmu, > + struct pmu_queue *queue, bool commit) > +{ > + if (!queue->opened) > + return 0; > + > + if (commit) { > + if (queue->oflag == OFLAG_READ) { > + pmu_queue_tail(pmu, queue, > + &queue->position, QUEUE_SET); > + } else { > + pmu_queue_head(pmu, queue, > + &queue->position, QUEUE_SET); > + } > + } > + > + queue->opened = false; > + > + pmu_queue_unlock(pmu, queue); > + > + return 0; > +} > + > +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout, > + u32 *var, u32 val) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); > + unsigned long delay = GK20A_IDLE_CHECK_DEFAULT; > + > + do { > + if (*var == val) > + return 0; > + > + if (nv_rd32(ppmu, 0x0010a008)) > + gk20a_pmu_isr(ppmu); > + > + usleep_range(delay, delay * 2); > + delay = min_t(u32, delay << 1, GK20A_IDLE_CHECK_MAX); > + } while (time_before(jiffies, end_jiffies)); > + > + return -ETIMEDOUT; > +} > + > +void pmu_dump_falcon_stats(struct pmu_desc *pmu) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + int i; > + > + nv_debug(ppmu, "pmu_falcon_os_r : %d\n", > + nv_rd32(ppmu, 0x0010a080)); > + nv_debug(ppmu, "pmu_falcon_cpuctl_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a100)); > + nv_debug(ppmu, "pmu_falcon_idlestate_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a04c)); > + nv_debug(ppmu, "pmu_falcon_mailbox0_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a040)); > + nv_debug(ppmu, "pmu_falcon_mailbox1_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a044)); > + nv_debug(ppmu, "pmu_falcon_irqstat_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a008)); > + nv_debug(ppmu, "pmu_falcon_irqmode_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a00c)); > + nv_debug(ppmu, "pmu_falcon_irqmask_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a018)); > + nv_debug(ppmu, "pmu_falcon_irqdest_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a01c)); > + > + for (i = 0; i < 0x0000000c; i++) > + nv_debug(ppmu, "pmu_pmu_mailbox_r(%d) : 0x%x\n", > + i, nv_rd32(ppmu, 0x0010a450 + i*4)); > + > + for (i = 0; i < 0x00000004; i++) > + nv_debug(ppmu, "pmu_pmu_debug_r(%d) : 0x%x\n", > + i, nv_rd32(ppmu, 0x0010a5c0 + i*4)); > + > + for (i = 0; i < 6/*NV_Ppmu_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) { > + nv_wr32(ppmu, 0x0010a200, > + 0xe | > + (i & 0x1f) << 8); > + nv_debug(ppmu, "pmu_rstat (%d) : 0x%x\n", > + i, nv_rd32(ppmu, 0x0010a20c)); > + } > + > + i = nv_rd32(ppmu, 0x0010a7b0); > + nv_debug(ppmu, "pmu_pmu_bar0_error_status_r : 0x%x\n", i); > + if (i != 0) { > + nv_debug(ppmu, "pmu_pmu_bar0_addr_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a7a0)); > + nv_debug(ppmu, "pmu_pmu_bar0_data_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a7a4)); > + nv_debug(ppmu, "pmu_pmu_bar0_timeout_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a7a8)); > + nv_debug(ppmu, "pmu_pmu_bar0_ctl_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a7ac)); > + } > + > + i = nv_rd32(ppmu, 0x0010a988); > + nv_debug(ppmu, "pmu_pmu_bar0_fecs_error_r : 0x%x\n", i); > + > + i = nv_rd32(ppmu, 0x0010a16c); > + nv_debug(ppmu, "pmu_falcon_exterrstat_r : 0x%x\n", i); > + if (((i >> 31) & 0x1)) { > + nv_debug(ppmu, "pmu_falcon_exterraddr_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a168)); > + /*nv_debug(ppmu, "pmc_enable : 0x%x\n", > + nv_rd32(pmc, 0x00000200));*/ > + } > + > + nv_debug(ppmu, "pmu_falcon_engctl_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a0a4)); > + nv_debug(ppmu, "pmu_falcon_curctx_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a050)); > + nv_debug(ppmu, "pmu_falcon_nxtctx_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a054)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_IMB & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_IMB : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_DMB & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_DMB : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_CSW & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_CSW : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_CTX & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_CTX : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_EXCI & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_EXCI : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + for (i = 0; i < 4; i++) { > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_PC & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_PC : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_SP & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_SP : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + } > + > + /* PMU may crash due to FECS crash. Dump FECS status */ > + /*gk20a_fecs_dump_falcon_stats(g);*/ > +} > + > +static bool pmu_validate_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd, > + struct pmu_msg *msg, struct pmu_payload *payload, > + u32 queue_id) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_queue *queue; > + u32 in_size, out_size; > + > + nv_debug(ppmu, "pmu validate cmd\n"); > + pmu_dump_falcon_stats(pmu); > + > + if (!PMU_IS_SW_COMMAND_QUEUE(queue_id)) > + goto invalid_cmd; > + > + queue = &pmu->queue[queue_id]; > + if (cmd->hdr.size < PMU_CMD_HDR_SIZE) > + goto invalid_cmd; > + > + if (cmd->hdr.size > (queue->size >> 1)) > + goto invalid_cmd; > + > + if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE) > + goto invalid_cmd; > + > + if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id)) > + goto invalid_cmd; > + > + if (payload == NULL) > + return true; > + > + if (payload->in.buf == NULL && payload->out.buf == NULL) > + goto invalid_cmd; > + > + if ((payload->in.buf != NULL && payload->in.size == 0) || > + (payload->out.buf != NULL && payload->out.size == 0)) > + goto invalid_cmd; > + > + in_size = PMU_CMD_HDR_SIZE; > + if (payload->in.buf) { > + in_size += payload->in.offset; > + in_size += sizeof(struct pmu_allocation_gk20a); > + } > + > + out_size = PMU_CMD_HDR_SIZE; > + if (payload->out.buf) { > + out_size += payload->out.offset; > + out_size += sizeof(struct pmu_allocation_gk20a); > + } > + > + if (in_size > cmd->hdr.size || out_size > cmd->hdr.size) > + goto invalid_cmd; > + > + > + if ((payload->in.offset != 0 && payload->in.buf == NULL) || > + (payload->out.offset != 0 && payload->out.buf == NULL)) > + goto invalid_cmd; > + > + return true; > + > +invalid_cmd: > + nv_error(ppmu, "invalid pmu cmd :\n" > + "queue_id=%d,\n" > + "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n" > + "payload in=%p, in_size=%d, in_offset=%d,\n" > + "payload out=%p, out_size=%d, out_offset=%d", > + queue_id, cmd->hdr.size, cmd->hdr.unit_id, > + msg, msg ? msg->hdr.unit_id : ~0, > + &payload->in, payload->in.size, payload->in.offset, > + &payload->out, payload->out.size, payload->out.offset); > + > + return false; > +} > + > +static int pmu_write_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd, > + u32 queue_id, unsigned long timeout) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_queue *queue; > + unsigned long end_jiffies = jiffies + > + msecs_to_jiffies(timeout); > + int err; > + > + nv_debug(ppmu, "pmu write cmd\n"); > + > + queue = &pmu->queue[queue_id]; > + > + do { > + err = pmu_queue_open_write(pmu, queue, cmd->hdr.size); > + if (err == -EAGAIN && time_before(jiffies, end_jiffies)) > + usleep_range(1000, 2000); > + else > + break; > + } while (1); > + > + if (err) > + goto clean_up; > + > + pmu_queue_push(pmu, queue, cmd, cmd->hdr.size); > + > + err = pmu_queue_close(pmu, queue, true); > + > +clean_up: > + if (err) > + nv_error(ppmu, > + "fail to write cmd to queue %d", queue_id); > + else > + nv_debug(ppmu, "cmd writing done"); > + > + return err; > +} > + > +int gk20a_pmu_cmd_post(struct nvkm_pmu *ppmu, struct pmu_cmd *cmd, > + struct pmu_msg *msg, struct pmu_payload *payload, > + u32 queue_id, pmu_callback callback, void *cb_param, > + u32 *seq_desc, unsigned long timeout) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct pmu_sequence *seq; > + struct pmu_allocation_gk20a *in = NULL, *out = NULL; > + int err; > + > + BUG_ON(!cmd); > + BUG_ON(!seq_desc); > + BUG_ON(!pmu->pmu_ready); > + nv_debug(ppmu, "Post CMD\n"); > + if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id)) > + return -EINVAL; > + > + err = pmu_seq_acquire(pmu, &seq); > + if (err) > + return err; > + > + cmd->hdr.seq_id = seq->id; > + > + cmd->hdr.ctrl_flags = 0; > + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS; > + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR; > + > + seq->callback = callback; > + seq->cb_params = cb_param; > + seq->msg = msg; > + seq->out_payload = NULL; > + seq->desc = pmu->next_seq_desc++; > + > + if (payload) > + seq->out_payload = payload->out.buf; > + > + *seq_desc = seq->desc; > + > + if (payload && payload->in.offset != 0) { > + in = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd + > + payload->in.offset); > + > + if (payload->in.buf != payload->out.buf) > + in->alloc.dmem.size = (u16)payload->in.size; > + else > + in->alloc.dmem.size > + (u16)max(payload->in.size, payload->out.size); > + > + err = pmu->dmem.alloc(&pmu->dmem, > + (void *)&in->alloc.dmem.offset, > + in->alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + if (err) > + goto clean_up; > + > + pmu_copy_to_dmem(pmu, (in->alloc.dmem.offset), > + payload->in.buf, payload->in.size, 0); > + seq->in_gk20a.alloc.dmem.size = in->alloc.dmem.size; > + seq->in_gk20a.alloc.dmem.offset = in->alloc.dmem.offset; > + } > + > + if (payload && payload->out.offset != 0) { > + out = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd + > + payload->out.offset); > + out->alloc.dmem.size = (u16)payload->out.size; > + > + if (payload->out.buf != payload->in.buf) { > + err = pmu->dmem.alloc(&pmu->dmem, > + (void *)&out->alloc.dmem.offset, > + out->alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + if (err) > + goto clean_up; > + } else { > + BUG_ON(in == NULL); > + out->alloc.dmem.offset = in->alloc.dmem.offset; > + } > + > + seq->out_gk20a.alloc.dmem.size = out->alloc.dmem.size; > + seq->out_gk20a.alloc.dmem.offset = out->alloc.dmem.offset; > + } > + > + seq->state = PMU_SEQ_STATE_USED; > + err = pmu_write_cmd(pmu, cmd, queue_id, timeout); > + if (err) > + seq->state = PMU_SEQ_STATE_PENDING; > + > + nv_debug(ppmu, "cmd posted\n"); > + > + return 0; > + > +clean_up: > + nv_debug(ppmu, "cmd post failed\n"); > + if (in) > + pmu->dmem.free(&pmu->dmem, > + in->alloc.dmem.offset, > + in->alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + if (out) > + pmu->dmem.free(&pmu->dmem, > + out->alloc.dmem.offset, > + out->alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + > + pmu_seq_release(pmu, seq); > + return err; > +} > + > +void gk20a_pmu_isr(struct nvkm_pmu *ppmu) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct nvkm_mc *pmc = nvkm_mc(ppmu); > + struct pmu_queue *queue; > + u32 intr, mask; > + bool recheck = false; > + if (!pmu->isr_enabled) > + goto out; > + > + mask = nv_rd32(ppmu, 0x0010a018) & > + nv_rd32(ppmu, 0x0010a01c); > + > + intr = nv_rd32(ppmu, 0x0010a008) & mask; > + > + nv_debug(ppmu, "received falcon interrupt: 0x%08x", intr); > + pmu_enable_irq(ppmu, pmc, false); > + if (!intr || pmu->pmu_state == PMU_STATE_OFF) { > + nv_wr32(ppmu, 0x0010a004, intr); > + nv_error(ppmu, "pmu state off\n"); > + pmu_enable_irq(ppmu, pmc, true); > + goto out; > + } > + if (intr & 0x10) { > + nv_error(ppmu, > + "pmu halt intr not implemented"); > + pmu_dump_falcon_stats(pmu); > + } > + if (intr & 0x20) { > + nv_error(ppmu, > + "pmu exterr intr not implemented. Clearing interrupt."); > + pmu_dump_falcon_stats(pmu); > + > + nv_wr32(ppmu, 0x0010a16c, > + nv_rd32(ppmu, 0x0010a16c) & > + ~(0x1 << 31)); > + } > + if (intr & 0x40) { > + nv_debug(ppmu, "scheduling work\n"); > + schedule_work(&pmu->isr_workq); > + pmu_enable_irq(ppmu, pmc, true); > + recheck = true; > + } > + > + if (recheck) { > + queue = &pmu->queue[PMU_MESSAGE_QUEUE]; > + if (!pmu_queue_is_empty(pmu, queue)) > + nv_wr32(ppmu, 0x0010a000, 0x40); > + } else { > + pmu_enable_irq(ppmu, pmc, true); > + } > + > + pmu_enable_irq(ppmu, pmc, true); > + nv_wr32(ppmu, 0x0010a004, intr); > +out: > + nv_debug(ppmu, "irq handled\n"); > +} > + > +static int > +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw) > +{ > + int ret = 0; > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + u32 *ucode_image; > + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; > + int i; > + struct pmu_priv_vm *ppmuvm = &pmuvm; > + struct nvkm_device *device = nv_device(&ppmu->base); > + struct nvkm_vm *vm; > + u64 pmu_area_len = 300*1024; > + > + ppmu->pmuvm = &pmuvm; > + ppmu->pg_buf = &pmu->pg_buf; > + pmu->pmu = ppmu; > + /* mem for inst blk*/ > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x1000, 0, 0, > + &ppmuvm->mem); > + if (ret) > + goto instblk_alloc_err; > + > + /* mem for pgd*/ > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x8000, 0, 0, > + &ppmuvm->pgd); > + if (ret) > + goto pgd_alloc_err; > + > + /*allocate virtual memory range*/ > + ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm); > + if (ret) > + goto virt_alloc_err; > + > + atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]); > + /*update VM with pgd */ > + > + ret = nvkm_vm_ref(vm, &ppmuvm->vm, ppmuvm->pgd); > + if (ret) > + goto virt_alloc_err; > + > + /*update pgd in inst blk */ > + nv_wo32(ppmuvm->mem, 0x0200, lower_32_bits(ppmuvm->pgd->addr)); > + nv_wo32(ppmuvm->mem, 0x0204, upper_32_bits(ppmuvm->pgd->addr)); > + nv_wo32(ppmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1)); > + nv_wo32(ppmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1)); > + > + /* allocate memory for pmu fw to be copied to*/ > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, > + GK20A_PMU_UCODE_SIZE_MAX, 0x1000, 0, &pmu->ucode.pmubufobj); > + if (ret) > + goto fw_alloc_err; > + > + ucode_image = (u32 *)((u32)desc + desc->descriptor_size); > + for (i = 0; i < (desc->app_start_offset + desc->app_size) >> 2; i++) { > + nv_wo32(pmu->ucode.pmubufobj, i << 2, ucode_image[i]); > + pr_info("writing 0x%08x\n", ucode_image[i]); > + } > + /* map allocated memory into GMMU */ > + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->ucode.pmubufobj), vm, > + NV_MEM_ACCESS_RW, > + &pmu->ucode.pmubufvma); > + if (ret) > + goto map_err; > + > + nv_debug(ppmu, "%s function end\n", __func__); > + return ret; > +map_err: > + nvkm_gpuobj_destroy(pmu->ucode.pmubufobj); > +virt_alloc_err: > +fw_alloc_err: > + nvkm_gpuobj_destroy(ppmuvm->pgd); > +pgd_alloc_err: > + nvkm_gpuobj_destroy(ppmuvm->mem); > +instblk_alloc_err: > + return ret; > + > +} > + > +static int > +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw) > +{ > + struct nvkm_device *dev; > + char name[32]; > + > + dev = nv_device(ppmu); > + > + snprintf(name, sizeof(name), "nvidia/tegra124/%s", > + GK20A_PMU_UCODE_IMAGE); > + > + return request_firmware(pfw, name, nv_device_base(dev)); > +} > + > +static void > +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, > + const struct firmware *fw) > +{ > + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; > + > + nv_debug(ppmu, "GK20A PMU firmware information\n"); > + nv_debug(ppmu, "descriptor size = %u\n", desc->descriptor_size); > + nv_debug(ppmu, "image size = %u\n", desc->image_size); > + nv_debug(ppmu, "app_version = 0x%08x\n", desc->app_version); > + nv_debug(ppmu, "date = %s\n", desc->date); > + nv_debug(ppmu, "bootloader_start_offset = 0x%08x\n", > + desc->bootloader_start_offset); > + nv_debug(ppmu, "bootloader_size = 0x%08x\n", desc->bootloader_size); > + nv_debug(ppmu, "bootloader_imem_offset = 0x%08x\n", > + desc->bootloader_imem_offset); > + nv_debug(ppmu, "bootloader_entry_point = 0x%08x\n", > + desc->bootloader_entry_point); > + nv_debug(ppmu, "app_start_offset = 0x%08x\n", desc->app_start_offset); > + nv_debug(ppmu, "app_size = 0x%08x\n", desc->app_size); > + nv_debug(ppmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset); > + nv_debug(ppmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry); > + nv_debug(ppmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset); > + nv_debug(ppmu, "app_resident_code_offset = 0x%08x\n", > + desc->app_resident_code_offset); > + nv_debug(ppmu, "app_resident_code_size = 0x%08x\n", > + desc->app_resident_code_size); > + nv_debug(ppmu, "app_resident_data_offset = 0x%08x\n", > + desc->app_resident_data_offset); > + nv_debug(ppmu, "app_resident_data_size = 0x%08x\n", > + desc->app_resident_data_size); > + nv_debug(ppmu, "nb_overlays = %d\n", desc->nb_overlays); > + > + nv_debug(ppmu, "compressed = %u\n", desc->compressed); > +} > + > +static int pmu_process_init_msg(struct pmu_desc *pmu, > + struct pmu_msg *msg) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_init_msg_pmu_gk20a *init; > + struct pmu_sha1_gid_data gid_data; > + u32 i, tail = 0; > + > + tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff; > + > + pmu_copy_from_dmem(pmu, tail, > + (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); > + > + if (msg->hdr.unit_id != PMU_UNIT_INIT) { > + nv_error(ppmu, > + "expecting init msg"); > + return -EINVAL; > + } > + > + pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE, > + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); > + > + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { > + nv_error(ppmu, > + "expecting init msg"); > + return -EINVAL; > + } > + > + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); > + nv_wr32(ppmu, 0x0010a4cc, > + tail & 0xffffffff); > + > + init = &msg->msg.init.pmu_init_gk20a; > + if (!pmu->gid_info.valid) { > + > + pmu_copy_from_dmem(pmu, > + init->sw_managed_area_offset, > + (u8 *)&gid_data, > + sizeof(struct pmu_sha1_gid_data), 0); > + > + pmu->gid_info.valid > + (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE); > + > + if (pmu->gid_info.valid) { > + > + BUG_ON(sizeof(pmu->gid_info.gid) !> + sizeof(gid_data.gid)); > + > + memcpy(pmu->gid_info.gid, gid_data.gid, > + sizeof(pmu->gid_info.gid)); > + } > + } > + > + for (i = 0; i < PMU_QUEUE_COUNT; i++) > + pmu_queue_init(pmu, i, init); > + > + if (!pmu->dmem.alloc) > + nvkm_pmu_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", > + init->sw_managed_area_offset, > + init->sw_managed_area_size); > + > + pmu->pmu_ready = true; > + pmu->pmu_state = PMU_STATE_INIT_RECEIVED; > + > + return 0; > +} > + > +static bool pmu_read_message(struct pmu_desc *pmu, struct pmu_queue *queue, > + struct pmu_msg *msg, int *status) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + u32 read_size, bytes_read; > + int err; > + > + *status = 0; > + > + if (pmu_queue_is_empty(pmu, queue)) > + return false; > + > + err = pmu_queue_open_read(pmu, queue); > + if (err) { > + nv_error(ppmu, > + "fail to open queue %d for read", queue->id); > + *status = err; > + return false; > + } > + > + err = pmu_queue_pop(pmu, queue, &msg->hdr, > + PMU_MSG_HDR_SIZE, &bytes_read); > + if (err || bytes_read != PMU_MSG_HDR_SIZE) { > + nv_error(ppmu, > + "fail to read msg from queue %d", queue->id); > + *status = err | -EINVAL; > + goto clean_up; > + } > + > + if (msg->hdr.unit_id == PMU_UNIT_REWIND) { > + pmu_queue_rewind(pmu, queue); > + /* read again after rewind */ > + err = pmu_queue_pop(pmu, queue, &msg->hdr, > + PMU_MSG_HDR_SIZE, &bytes_read); > + if (err || bytes_read != PMU_MSG_HDR_SIZE) { > + nv_error(ppmu, > + "fail to read msg from queue %d", queue->id); > + *status = err | -EINVAL; > + goto clean_up; > + } > + } > + > + if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) { > + nv_error(ppmu, > + "read invalid unit_id %d from queue %d", > + msg->hdr.unit_id, queue->id); > + *status = -EINVAL; > + goto clean_up; > + } > + > + if (msg->hdr.size > PMU_MSG_HDR_SIZE) { > + read_size = msg->hdr.size - PMU_MSG_HDR_SIZE; > + err = pmu_queue_pop(pmu, queue, &msg->msg, > + read_size, &bytes_read); > + if (err || bytes_read != read_size) { > + nv_error(ppmu, > + "fail to read msg from queue %d", queue->id); > + *status = err; > + goto clean_up; > + } > + } > + > + err = pmu_queue_close(pmu, queue, true); > + if (err) { > + nv_error(ppmu, > + "fail to close queue %d", queue->id); > + *status = err; > + return false; > + } > + > + return true; > + > +clean_up: > + err = pmu_queue_close(pmu, queue, false); > + if (err) > + nv_error(ppmu, > + "fail to close queue %d", queue->id); > + return false; > +} > + > +static int pmu_response_handle(struct pmu_desc *pmu, > + struct pmu_msg *msg) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_sequence *seq; > + int ret = 0; > + > + nv_debug(ppmu, "handling pmu response\n"); > + seq = &pmu->seq[msg->hdr.seq_id]; > + if (seq->state != PMU_SEQ_STATE_USED && > + seq->state != PMU_SEQ_STATE_CANCELLED) { > + nv_error(ppmu, > + "msg for an unknown sequence %d", seq->id); > + return -EINVAL; > + } > + > + if (msg->hdr.unit_id == PMU_UNIT_RC && > + msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { > + nv_error(ppmu, > + "unhandled cmd: seq %d", seq->id); > + } else if (seq->state != PMU_SEQ_STATE_CANCELLED) { > + if (seq->msg) { > + if (seq->msg->hdr.size >= msg->hdr.size) { > + memcpy(seq->msg, msg, msg->hdr.size); > + if (seq->out_gk20a.alloc.dmem.size != 0) { > + pmu_copy_from_dmem(pmu, > + seq->out_gk20a.alloc.dmem.offset, > + seq->out_payload, > + seq->out_gk20a.alloc.dmem.size, 0); > + } > + } else { > + nv_error(ppmu, > + "sequence %d msg buffer too small", > + seq->id); > + } > + } > + } else > + seq->callback = NULL; > + if (seq->in_gk20a.alloc.dmem.size != 0) > + pmu->dmem.free(&pmu->dmem, > + seq->in_gk20a.alloc.dmem.offset, > + seq->in_gk20a.alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + if (seq->out_gk20a.alloc.dmem.size != 0) > + pmu->dmem.free(&pmu->dmem, > + seq->out_gk20a.alloc.dmem.offset, > + seq->out_gk20a.alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + > + if (seq->callback) > + seq->callback(ppmu, msg, seq->cb_params, seq->desc, ret); > + > + pmu_seq_release(pmu, seq); > + > + /* TBD: notify client waiting for available dmem */ > + nv_debug(ppmu, "pmu response processed\n"); > + > + return 0; > +} > + > +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout, > + u32 *var, u32 val); > + > + > +static int pmu_handle_event(struct pmu_desc *pmu, struct pmu_msg *msg) > +{ > + int err = 0; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + switch (msg->hdr.unit_id) { > + case PMU_UNIT_PERFMON: > + nv_debug(ppmu, "init perfmon event generated\n"); > + break; > + default: > + nv_debug(ppmu, "default event generated\n"); > + break; > + } > + > + return err; > +} > + > +void pmu_process_message(struct work_struct *work) > +{ > + struct pmu_desc *pmu = container_of(work, struct pmu_desc, isr_workq); > + struct pmu_msg msg; > + int status; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct nvkm_mc *pmc = nvkm_mc(ppmu); > + > + mutex_lock(&pmu->isr_mutex); > + if (unlikely(!pmu->pmu_ready)) { > + nv_debug(ppmu, "processing init msg\n"); > + pmu_process_init_msg(pmu, &msg); > + mutex_unlock(&pmu->isr_mutex); > + pmu_enable_irq(ppmu, pmc, true); > + goto out; > + } > + > + while (pmu_read_message(pmu, > + &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) { > + > + nv_debug(ppmu, "read msg hdr:\n" > + "unit_id = 0x%08x, size = 0x%08x,\n" > + "ctrl_flags = 0x%08x, seq_id = 0x%08x\n", > + msg.hdr.unit_id, msg.hdr.size, > + msg.hdr.ctrl_flags, msg.hdr.seq_id); > + > + msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK; > + > + if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) > + pmu_handle_event(pmu, &msg); > + else > + pmu_response_handle(pmu, &msg); > + } > + mutex_unlock(&pmu->isr_mutex); > + pmu_enable_irq(ppmu, pmc, true); > +out: > + nv_debug(ppmu, "exit %s\n", __func__); > +} > + > +int gk20a_pmu_destroy(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + > + /* make sure the pending operations are finished before we continue */ > + cancel_work_sync(&pmu->isr_workq); > + pmu->initialized = false; > + > + mutex_lock(&pmu->isr_mutex); > + pmu_enable(ppmu, pmc, false); > + pmu->isr_enabled = false; > + mutex_unlock(&pmu->isr_mutex); > + > + pmu->pmu_state = PMU_STATE_OFF; > + pmu->pmu_ready = false; > + pmu->zbc_ready = false; > + > + return 0; > +} > + > +int gk20a_pmu_load_norm(struct nvkm_pmu *ppmu, u32 *load) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + *load = pmu->load_shadow; > + return 0; > +} > + > +int gk20a_pmu_load_update(struct nvkm_pmu *ppmu) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + u16 _load = 0; > + > + pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); > + pmu->load_shadow = _load / 10; > + pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); > + > + return 0; > +} > + > +void gk20a_pmu_get_load_counters(struct nvkm_pmu *ppmu, u32 *busy_cycles, > + u32 *total_cycles) > +{ > + /*todo if (!g->power_on || gk20a_busy(g->dev)) { > + *busy_cycles = 0; > + *total_cycles = 0; > + return; > + }*/ > + > + *busy_cycles = nv_rd32(ppmu, 0x0010a508 + 16) & 0x7fffffff; > + /*todormb();*/ > + *total_cycles = nv_rd32(ppmu, 0x0010a508 + 32) & 0x7fffffff; > + /*todogk20a_idle(g->dev);*/ > +} > + > +void gk20a_pmu_reset_load_counters(struct nvkm_pmu *ppmu) > +{ > + u32 reg_val = 1 << 31; > + > + /*todoif (!g->power_on || gk20a_busy(g->dev)) > + return;*/ > + > + nv_wr32(ppmu, 0x0010a508 + 32, reg_val); > + /*todowmb()*/; > + nv_wr32(ppmu, 0x0010a508 + 16, reg_val); > + /*todogk20a_idle(g->dev);*/ > +} > + > +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + int err; > + > + mutex_lock(&pmu->isr_mutex); > + pmu_reset(ppmu, pmc); > + pmu->isr_enabled = true; > + mutex_unlock(&pmu->isr_mutex); > + > + /* setup apertures - virtual */ > + nv_wr32(ppmu, 0x10a600 + 0 * 4, 0x0); > + nv_wr32(ppmu, 0x10a600 + 1 * 4, 0x0); > + /* setup apertures - physical */ > + nv_wr32(ppmu, 0x10a600 + 2 * 4, 0x4 | 0x0); > + nv_wr32(ppmu, 0x10a600 + 3 * 4, 0x4 | 0x1); > + nv_wr32(ppmu, 0x10a600 + 4 * 4, 0x4 | 0x2); > + > + /* TBD: load pmu ucode */ > + err = pmu_bootstrap(pmu); > + if (err) > + return err; > + > + return 0; > + > +} > + > +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct pmu_priv_vm *ppmuvm = &pmuvm; > + int i, err = 0; > + int ret = 0; > + > + > + if (pmu->sw_ready) { > + > + for (i = 0; i < pmu->mutex_cnt; i++) { > + pmu->mutex[i].id = i; > + pmu->mutex[i].index = i; > + } > + pmu_seq_init(pmu); > + > + nv_debug(ppmu, "skipping init\n"); > + goto skip_init; > + } > + > + /* no infoRom script from vbios? */ > + > + /* TBD: sysmon subtask */ > + > + pmu->mutex_cnt = 0x00000010; > + pmu->mutex = kzalloc(pmu->mutex_cnt * > + sizeof(struct pmu_mutex), GFP_KERNEL); > + if (!pmu->mutex) { > + err = -ENOMEM; > + nv_error(ppmu, "not enough space ENOMEM\n"); > + goto err; > + } > + > + for (i = 0; i < pmu->mutex_cnt; i++) { > + pmu->mutex[i].id = i; > + pmu->mutex[i].index = i; > + } > + > + pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES * > + sizeof(struct pmu_sequence), GFP_KERNEL); > + if (!pmu->seq) { > + err = -ENOMEM; > + nv_error(ppmu, "not enough space ENOMEM\n"); > + goto err_free_mutex; > + } > + > + pmu_seq_init(pmu); > + > + INIT_WORK(&pmu->isr_workq, pmu_process_message); > + init_waitqueue_head(&ppmu->init_wq); > + ppmu->gr_initialised = false; > + > + /* allocate memory for pmu fw area */ > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_SEQ_BUF_SIZE, > + 0x1000, 0, &pmu->seq_buf.pmubufobj); > + if (ret) > + return ret; > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_TRACE_BUFSIZE, > + 0, 0, &pmu->trace_buf.pmubufobj); > + if (ret) > + return ret; > + /* map allocated memory into GMMU */ > + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->seq_buf.pmubufobj), > + ppmuvm->vm, > + NV_MEM_ACCESS_RW, > + &pmu->seq_buf.pmubufvma); > + if (ret) > + return ret; > + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->trace_buf.pmubufobj), > + ppmuvm->vm, > + NV_MEM_ACCESS_RW, > + &pmu->trace_buf.pmubufvma); > + if (ret) > + return ret; > + > + /* TBD: remove this if ZBC save/restore is handled by PMU > + * end an empty ZBC sequence for now */ > + nv_wo32(pmu->seq_buf.pmubufobj, 0, 0x16); > + nv_wo32(pmu->seq_buf.pmubufobj, 1, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 2, 0x01); > + nv_wo32(pmu->seq_buf.pmubufobj, 3, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 4, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 5, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 6, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 7, 0x00); > + > + pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; > + ret = gk20a_pmu_debugfs_init(ppmu); > + if (ret) > + return ret; > + > + pmu->sw_ready = true; > + > +skip_init: > + return 0; > +err_free_mutex: > + kfree(pmu->mutex); > +err: > + return err; > +} > + > +static void > +gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable) > +{ > + /* > + nv_mask(ppmu, 0x000200, 0x00001000, 0x00000000); > + nv_rd32(ppmu, 0x000200); > + nv_mask(ppmu, 0x000200, 0x08000000, 0x08000000); > + > + msleep(50); > + > + nv_mask(ppmu, 0x000200, 0x08000000, 0x00000000); > + nv_mask(ppmu, 0x000200, 0x00001000, 0x00001000); > + nv_rd32(ppmu, 0x000200); > + */ > +} > + > +static void gk20a_pmu_intr(struct nvkm_subdev *subdev) > +{ > + struct nvkm_pmu *ppmu = nvkm_pmu(subdev); > + > + gk20a_pmu_isr(ppmu); > +} > + > +void gk20a_remove_pmu_support(struct pmu_desc *pmu) > +{ > + nvkm_pmu_allocator_destroy(&pmu->dmem); > +} > + > +int gk20a_message(struct nvkm_pmu *ppmu, u32 reply[2], > + u32 process, u32 message, u32 data0, u32 data1) > +{ > + return -EPERM; > +} > + > +int > +gk20a_pmu_create_(struct nvkm_object *parent, > + struct nvkm_object *engine, > + struct nvkm_oclass *oclass, int length, void **pobject) > +{ > + struct nvkm_pmu *ppmu; > + struct nvkm_device *device = nv_device(parent); > + int ret; > + > + ret = nvkm_subdev_create_(parent, engine, oclass, 0, "PPMU", > + "pmu", length, pobject); > + ppmu = *pobject; > + if (ret) > + return ret; > + > + ret = nv_device_get_irq(device, true); > + > + ppmu->message = gk20a_message; > + ppmu->pgob = gk20a_pmu_pgob; > + ppmu->pmu_mutex_acquire = pmu_mutex_acquire; > + ppmu->pmu_mutex_release = pmu_mutex_release; > + ppmu->pmu_load_norm = gk20a_pmu_load_norm; > + ppmu->pmu_load_update = gk20a_pmu_load_update; > + ppmu->pmu_reset_load_counters = gk20a_pmu_reset_load_counters; > + ppmu->pmu_get_load_counters = gk20a_pmu_get_load_counters; > + > + return 0; > +} > + > + > + > diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.h b/drm/nouveau/nvkm/subdev/pmu/gk20a.h > new file mode 100644 > index 000000000000..a084d6d518b4 > --- /dev/null > +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.h > @@ -0,0 +1,369 @@ > +#ifndef __NVKM_pmu_GK20A_H__ > +#define __NVKM_pmu_GK20A_H__ > + > +/* > + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > + * DEALINGS IN THE SOFTWARE. > + */ > +void pmu_setup_hw(struct pmu_desc *pmu); > +void gk20a_remove_pmu_support(struct pmu_desc *pmu); > +#define gk20a_pmu_create(p, e, o, d) \ > + gk20a_pmu_create_((p), (e), (o), sizeof(**d), (void **)d) > + > +int gk20a_pmu_create_(struct nvkm_object *, struct nvkm_object *, > + struct nvkm_oclass *, int, void **); > +/* defined by pmu hw spec */ > +#define GK20A_PMU_VA_SIZE (512 * 1024 * 1024) > +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) > +#define GK20A_PMU_SEQ_BUF_SIZE 4096 > +/* idle timeout */ > +#define GK20A_IDLE_CHECK_DEFAULT 100 /* usec */ > +#define GK20A_IDLE_CHECK_MAX 5000 /* usec */ > + > +/* so far gk20a has two engines: gr and ce2(gr_copy) */ > +enum { > + ENGINE_GR_GK20A = 0, > + ENGINE_CE2_GK20A = 1, > + ENGINE_INVAL_GK20A > +}; > + > +#define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe) > + > +#define APP_VERSION_GK20A 17997577 > + > +enum { > + GK20A_PMU_DMAIDX_UCODE = 0, > + GK20A_PMU_DMAIDX_VIRT = 1, > + GK20A_PMU_DMAIDX_PHYS_VID = 2, > + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, > + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, > + GK20A_PMU_DMAIDX_RSVD = 5, > + GK20A_PMU_DMAIDX_PELPG = 6, > + GK20A_PMU_DMAIDX_END = 7 > +}; > + > +struct pmu_mem_gk20a { > + u32 dma_base; > + u8 dma_offset; > + u8 dma_idx; > + u16 fb_size; > +}; > + > +struct pmu_dmem { > + u16 size; > + u32 offset; > +}; > + > +struct pmu_cmdline_args_gk20a { > + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ > + u32 falc_trace_size; /* falctrace buffer size (bytes) */ > + u32 falc_trace_dma_base; /* 256-byte block address */ > + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ > + u8 secure_mode; > + struct pmu_mem_gk20a gc6_ctx; /* dmem offset of gc6 context */ > +}; > + > +#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */ > +#define GK20A_PMU_DMEM_BLKSIZE2 8 > + > +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 > +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 > + > +struct pmu_ucode_desc { > + u32 descriptor_size; > + u32 image_size; > + u32 tools_version; > + u32 app_version; > + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; > + u32 bootloader_start_offset; > + u32 bootloader_size; > + u32 bootloader_imem_offset; > + u32 bootloader_entry_point; > + u32 app_start_offset; > + u32 app_size; > + u32 app_imem_offset; > + u32 app_imem_entry; > + u32 app_dmem_offset; > + u32 app_resident_code_offset; /* Offset from appStartOffset */ > +/* Exact size of the resident code > + * ( potentially contains CRC inside at the end ) */ > + u32 app_resident_code_size; > + u32 app_resident_data_offset; /* Offset from appStartOffset */ > +/* Exact size of the resident data > + * ( potentially contains CRC inside at the end ) */ > + u32 app_resident_data_size; > + u32 nb_overlays; > + struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; > + u32 compressed; > +}; > + > +#define PMU_UNIT_REWIND (0x00) > +#define PMU_UNIT_PG (0x03) > +#define PMU_UNIT_INIT (0x07) > +#define PMU_UNIT_PERFMON (0x12) > +#define PMU_UNIT_THERM (0x1B) > +#define PMU_UNIT_RC (0x1F) > +#define PMU_UNIT_NULL (0x20) > +#define PMU_UNIT_END (0x23) > + > +#define PMU_UNIT_TEST_START (0xFE) > +#define PMU_UNIT_END_SIM (0xFF) > +#define PMU_UNIT_TEST_END (0xFF) > + > +#define PMU_UNIT_ID_IS_VALID(id) \ > + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) > + > +#define PMU_DMEM_ALLOC_ALIGNMENT (32) > +#define PMU_DMEM_ALIGNMENT (4) > + > +#define PMU_CMD_FLAGS_PMU_MASK (0xF0) > + > +#define PMU_CMD_FLAGS_STATUS BIT(0) > +#define PMU_CMD_FLAGS_INTR BIT(1) > +#define PMU_CMD_FLAGS_EVENT BIT(2) > +#define PMU_CMD_FLAGS_WATERMARK BIT(3) > + > +struct pmu_hdr { > + u8 unit_id; > + u8 size; > + u8 ctrl_flags; > + u8 seq_id; > +}; > +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) > +#define PMU_CMD_HDR_SIZE sizeof(struct pmu_hdr) > + > + > +struct pmu_allocation_gk20a { > + struct { > + struct pmu_dmem dmem; > + struct pmu_mem_gk20a fb; > + } alloc; > +}; > + > +enum { > + PMU_INIT_MSG_TYPE_PMU_INIT = 0, > +}; > + > +struct pmu_init_msg_pmu_gk20a { > + u8 msg_type; > + u8 pad; > + u16 os_debug_entry_point; > + > + struct { > + u16 size; > + u16 offset; > + u8 index; > + u8 pad; > + } queue_info[PMU_QUEUE_COUNT]; > + > + u16 sw_managed_area_offset; > + u16 sw_managed_area_size; > +}; > + > +struct pmu_init_msg { > + union { > + u8 msg_type; > + struct pmu_init_msg_pmu_gk20a pmu_init_gk20a; > + }; > +}; > + > + > +enum { > + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, > +}; > + > +struct pmu_rc_msg_unhandled_cmd { > + u8 msg_type; > + u8 unit_id; > +}; > + > +struct pmu_rc_msg { > + u8 msg_type; > + struct pmu_rc_msg_unhandled_cmd unhandled_cmd; > +}; > + > +/* PERFMON */ > +#define PMU_DOMAIN_GROUP_PSTATE 0 > +#define PMU_DOMAIN_GROUP_GPC2CLK 1 > +#define PMU_DOMAIN_GROUP_NUM 2 > +struct pmu_perfmon_counter_gk20a { > + u8 index; > + u8 flags; > + u8 group_id; > + u8 valid; > + u16 upper_threshold; /* units of 0.01% */ > + u16 lower_threshold; /* units of 0.01% */ > +}; > +struct pmu_zbc_cmd { > + u8 cmd_type; > + u8 pad; > + u16 entry_mask; > +}; > + > +/* PERFMON MSG */ > +enum { > + PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0, > + PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1, > + PMU_PERFMON_MSG_ID_INIT_EVENT = 2, > + PMU_PERFMON_MSG_ID_ACK = 3 > +}; > + > +struct pmu_perfmon_msg_generic { > + u8 msg_type; > + u8 state_id; > + u8 group_id; > + u8 data; > +}; > + > +struct pmu_perfmon_msg { > + union { > + u8 msg_type; > + struct pmu_perfmon_msg_generic gen; > + }; > +}; > + > + > +struct pmu_cmd { > + struct pmu_hdr hdr; > + union { > + struct pmu_zbc_cmd zbc; > + } cmd; > +}; > + > +struct pmu_msg { > + struct pmu_hdr hdr; > + union { > + struct pmu_init_msg init; > + struct pmu_perfmon_msg perfmon; > + struct pmu_rc_msg rc; > + } msg; > +}; > + > +/* write by sw, read by pmu, protected by sw mutex lock */ > +#define PMU_COMMAND_QUEUE_HPQ 0 > +/* write by sw, read by pmu, protected by sw mutex lock */ > +#define PMU_COMMAND_QUEUE_LPQ 1 > +/* write by pmu, read by sw, accessed by interrupt handler, no lock */ > +#define PMU_MESSAGE_QUEUE 4 > +#define PMU_QUEUE_COUNT 5 > + > +enum { > + PMU_MUTEX_ID_RSVD1 = 0, > + PMU_MUTEX_ID_GPUSER, > + PMU_MUTEX_ID_GPMUTEX, > + PMU_MUTEX_ID_I2C, > + PMU_MUTEX_ID_RMLOCK, > + PMU_MUTEX_ID_MSGBOX, > + PMU_MUTEX_ID_FIFO, > + PMU_MUTEX_ID_PG, > + PMU_MUTEX_ID_GR, > + PMU_MUTEX_ID_CLK, > + PMU_MUTEX_ID_RSVD6, > + PMU_MUTEX_ID_RSVD7, > + PMU_MUTEX_ID_RSVD8, > + PMU_MUTEX_ID_RSVD9, > + PMU_MUTEX_ID_INVALID > +}; > + > +#define PMU_IS_COMMAND_QUEUE(id) \ > + ((id) < PMU_MESSAGE_QUEUE) > + > +#define PMU_IS_SW_COMMAND_QUEUE(id) \ > + (((id) == PMU_COMMAND_QUEUE_HPQ) || \ > + ((id) == PMU_COMMAND_QUEUE_LPQ)) > + > +#define PMU_IS_MESSAGE_QUEUE(id) \ > + ((id) == PMU_MESSAGE_QUEUE) > + > +enum { > + OFLAG_READ = 0, > + OFLAG_WRITE > +}; > + > +#define QUEUE_SET (true) > + /*todo find how to get cpu_pa*/ > +#define QUEUE_GET (false) > + > +#define QUEUE_ALIGNMENT (4) > + > +#define PMU_PGENG_GR_BUFFER_IDX_INIT (0) > +#define PMU_PGENG_GR_BUFFER_IDX_ZBC (1) > +#define PMU_PGENG_GR_BUFFER_IDX_FECS (2) > + > +enum { > + PMU_DMAIDX_UCODE = 0, > + PMU_DMAIDX_VIRT = 1, > + PMU_DMAIDX_PHYS_VID = 2, > + PMU_DMAIDX_PHYS_SYS_COH = 3, > + PMU_DMAIDX_PHYS_SYS_NCOH = 4, > + PMU_DMAIDX_RSVD = 5, > + PMU_DMAIDX_PELPG = 6, > + PMU_DMAIDX_END = 7 > +}; > + > +#define PMU_MUTEX_ID_IS_VALID(id) \ > + ((id) < PMU_MUTEX_ID_INVALID) > + > +#define PMU_INVALID_MUTEX_OWNER_ID (0) > + > +struct pmu_mutex { > + u32 id; > + u32 index; > + u32 ref_cnt; > +}; > + > + > +#define PMU_INVALID_SEQ_DESC (~0) > + > +enum { > + PMU_SEQ_STATE_FREE = 0, > + PMU_SEQ_STATE_PENDING, > + PMU_SEQ_STATE_USED, > + PMU_SEQ_STATE_CANCELLED > +}; > + > +struct pmu_payload { > + struct { > + void *buf; > + u32 offset; > + u32 size; > + } in, out; > +}; > + > +typedef void (*pmu_callback)(struct nvkm_pmu *, struct pmu_msg *, void *, > +u32, u32); > + > +struct pmu_sequence { > + u8 id; > + u32 state; > + u32 desc; > + struct pmu_msg *msg; > + struct pmu_allocation_gk20a in_gk20a; > + struct pmu_allocation_gk20a out_gk20a; > + u8 *out_payload; > + pmu_callback callback; > + void *cb_params; > +}; > +struct pmu_gk20a_data { > + struct pmu_perfmon_counter_gk20a perfmon_counter_gk20a; > + u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM]; > +}; > + > +#endif /*_GK20A_H__*/ > diff --git a/drm/nouveau/nvkm/subdev/pmu/priv.h b/drm/nouveau/nvkm/subdev/pmu/priv.h > index 998410563bfd..c4686e418582 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/priv.h > +++ b/drm/nouveau/nvkm/subdev/pmu/priv.h > @@ -2,7 +2,91 @@ > #define __NVKM_PMU_PRIV_H__ > #include <subdev/pmu.h> > #include <subdev/pmu/fuc/os.h> > +#include <core/object.h> > +#include <core/device.h> > +#include <core/parent.h> > +#include <core/mm.h> > +#include <linux/rwsem.h> > +#include <linux/slab.h> > +#include <subdev/mmu.h> > +#include <core/gpuobj.h> > > +static inline u32 u64_hi32(u64 n) > +{ > + return (u32)((n >> 32) & ~(u32)0); > +} > + > +static inline u32 u64_lo32(u64 n) > +{ > + return (u32)(n & ~(u32)0); > +} > + > +/* #define ALLOCATOR_DEBUG */ > + > +/* main struct */ > +struct nvkm_pmu_allocator { > + > + char name[32]; /* name for allocator */ > +/*struct rb_root rb_root;*/ /* rb tree root for blocks */ > + > + u32 base; /* min value of this linear space */ > + u32 limit; /* max value = limit - 1 */ > + > + unsigned long *bitmap; /* bitmap */ > + > + struct gk20a_alloc_block *block_first; /* first block in list */ > + struct gk20a_alloc_block *block_recent; /* last visited block */ > + > + u32 first_free_addr; /* first free addr, non-contigous > + allocation preferred start, > + in order to pick up small holes */ > + u32 last_free_addr; /* last free addr, contiguous > + allocation preferred start */ > + u32 cached_hole_size; /* max free hole size up to > + last_free_addr */ > + u32 block_count; /* number of blocks */ > + > + struct rw_semaphore rw_sema; /* lock */ > + struct kmem_cache *block_cache; /* slab cache */ > + > + /* if enabled, constrain to [base, limit) */ > + struct { > + bool enable; > + u32 base; > + u32 limit; > + } constraint; > + > + int (*alloc)(struct nvkm_pmu_allocator *allocator, > + u32 *addr, u32 len, u32 align); > + int (*free)(struct nvkm_pmu_allocator *allocator, > + u32 addr, u32 len, u32 align); > + > +}; > + > +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator, > + const char *name, u32 base, u32 size); > +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator); > + > +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator, > + u32 *addr, u32 len, u32 align); > + > +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator, > + u32 addr, u32 len, u32 align); > + > +#if defined(ALLOCATOR_DEBUG) > + > +#define allocator_dbg(alloctor, format, arg...) \ > +do { \ > + if (1) \ > + pr_debug("nvkm_pmu_allocator (%s) %s: " format "\n",\ > + alloctor->name, __func__, ##arg);\ > +} while (0) > + > +#else /* ALLOCATOR_DEBUG */ > + > +#define allocator_dbg(format, arg...) > + > +#endif /* ALLOCATOR_DEBUG */ > #define nvkm_pmu_create(p, e, o, d) \ > nvkm_pmu_create_((p), (e), (o), sizeof(**d), (void **)d) > #define nvkm_pmu_destroy(p) \ > @@ -26,6 +110,179 @@ int _nvkm_pmu_ctor(struct nvkm_object *, struct nvkm_object *, > int _nvkm_pmu_init(struct nvkm_object *); > int _nvkm_pmu_fini(struct nvkm_object *, bool); > void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable); > +#define PMU_PG_IDLE_THRESHOLD 15000 > +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000 > + > +/* state transition : > + OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF > + ON => OFF is always synchronized */ > +#define PMU_ELPG_STAT_OFF 0 /* elpg is off */ > +#define PMU_ELPG_STAT_ON 1 /* elpg is on */ > +/* elpg is off, ALLOW cmd has been sent, wait for ack */ > +#define PMU_ELPG_STAT_ON_PENDING 2 > +/* elpg is on, DISALLOW cmd has been sent, wait for ack */ > +#define PMU_ELPG_STAT_OFF_PENDING 3 > +/* elpg is off, caller has requested on, but ALLOW > +cmd hasn't been sent due to ENABLE_ALLOW delay */ > +#define PMU_ELPG_STAT_OFF_ON_PENDING 4 > + > +/* Falcon Register index */ > +#define PMU_FALCON_REG_R0 (0) > +#define PMU_FALCON_REG_R1 (1) > +#define PMU_FALCON_REG_R2 (2) > +#define PMU_FALCON_REG_R3 (3) > +#define PMU_FALCON_REG_R4 (4) > +#define PMU_FALCON_REG_R5 (5) > +#define PMU_FALCON_REG_R6 (6) > +#define PMU_FALCON_REG_R7 (7) > +#define PMU_FALCON_REG_R8 (8) > +#define PMU_FALCON_REG_R9 (9) > +#define PMU_FALCON_REG_R10 (10) > +#define PMU_FALCON_REG_R11 (11) > +#define PMU_FALCON_REG_R12 (12) > +#define PMU_FALCON_REG_R13 (13) > +#define PMU_FALCON_REG_R14 (14) > +#define PMU_FALCON_REG_R15 (15) > +#define PMU_FALCON_REG_IV0 (16) > +#define PMU_FALCON_REG_IV1 (17) > +#define PMU_FALCON_REG_UNDEFINED (18) > +#define PMU_FALCON_REG_EV (19) > +#define PMU_FALCON_REG_SP (20) > +#define PMU_FALCON_REG_PC (21) > +#define PMU_FALCON_REG_IMB (22) > +#define PMU_FALCON_REG_DMB (23) > +#define PMU_FALCON_REG_CSW (24) > +#define PMU_FALCON_REG_CCR (25) > +#define PMU_FALCON_REG_SEC (26) > +#define PMU_FALCON_REG_CTX (27) > +#define PMU_FALCON_REG_EXCI (28) > +#define PMU_FALCON_REG_RSVD0 (29) > +#define PMU_FALCON_REG_RSVD1 (30) > +#define PMU_FALCON_REG_RSVD2 (31) > +#define PMU_FALCON_REG_SIZE (32) > + > +/* Choices for pmu_state */ > +#define PMU_STATE_OFF 0 /* PMU is off */ > +#define PMU_STATE_STARTING 1 /* PMU is on, but not booted */ > +#define PMU_STATE_INIT_RECEIVED 2 /* PMU init message received */ > +#define PMU_STATE_ELPG_BOOTING 3 /* PMU is booting */ > +#define PMU_STATE_ELPG_BOOTED 4 /* ELPG is initialized */ > +#define PMU_STATE_LOADING_PG_BUF 5 /* Loading PG buf */ > +#define PMU_STATE_LOADING_ZBC 6 /* Loading ZBC buf */ > +#define PMU_STATE_STARTED 7 /* Fully unitialized */ > + > +#define PMU_QUEUE_COUNT 5 > + > +#define PMU_MAX_NUM_SEQUENCES (256) > +#define PMU_SEQ_BIT_SHIFT (5) > +#define PMU_SEQ_TBL_SIZE \ > + (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT) > + > +#define PMU_SHA1_GID_SIGNATURE 0xA7C66AD2 > +#define PMU_SHA1_GID_SIGNATURE_SIZE 4 > + > +#define PMU_SHA1_GID_SIZE 16 > + > +struct pmu_queue { > + > + /* used by hw, for BIOS/SMI queue */ > + u32 mutex_id; > + u32 mutex_lock; > + /* used by sw, for LPQ/HPQ queue */ > + struct mutex mutex; > + > + /* current write position */ > + u32 position; > + /* physical dmem offset where this queue begins */ > + u32 offset; > + /* logical queue identifier */ > + u32 id; > + /* physical queue index */ > + u32 index; > + /* in bytes */ > + u32 size; > + > + /* open-flag */ > + u32 oflag; > + bool opened; /* opened implies locked */ > +}; > + > +struct pmu_sha1_gid { > + bool valid; > + u8 gid[PMU_SHA1_GID_SIZE]; > +}; > + > +struct pmu_sha1_gid_data { > + u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE]; > + u8 gid[PMU_SHA1_GID_SIZE]; > +}; > + > +struct pmu_desc { > + > + struct pmu_ucode_desc *desc; > + struct pmu_buf_desc ucode; > + > + struct pmu_buf_desc pg_buf; > + /* TBD: remove this if ZBC seq is fixed */ > + struct pmu_buf_desc seq_buf; > + struct pmu_buf_desc trace_buf; > + bool buf_loaded; > + > + struct pmu_sha1_gid gid_info; > + > + struct pmu_queue queue[PMU_QUEUE_COUNT]; > + > + struct pmu_sequence *seq; > + unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE]; > + u32 next_seq_desc; > + > + struct pmu_mutex *mutex; > + u32 mutex_cnt; > + > + struct mutex pmu_copy_lock; > + struct mutex pmu_seq_lock; > + > + struct nvkm_pmu_allocator dmem; > + > + u32 *ucode_image; > + bool pmu_ready; > + > + u32 zbc_save_done; > + > + u32 stat_dmem_offset; > + > + u32 elpg_stat; > + > + int pmu_state; > + > +#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */ > + struct work_struct isr_workq; > + struct mutex elpg_mutex; /* protect elpg enable/disable */ > +/* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */ > + int elpg_refcnt; > + > + bool initialized; > + > + void (*remove_support)(struct pmu_desc *pmu); > + bool sw_ready; > + bool perfmon_ready; > + > + u32 sample_buffer; > + u32 load_shadow; > + u32 load_avg; > + > + struct mutex isr_mutex; > + bool isr_enabled; > + > + bool zbc_ready; > + unsigned long perfmon_events_cnt; > + bool perfmon_sampling_enabled; > + u8 pmu_mode; > + u32 falcon_id; > + u32 aelpg_param[5]; > + void *pmu_chip_data; > + struct nvkm_pmu *pmu; > +}; > > struct nvkm_pmu_impl { > struct nvkm_oclass base; > @@ -39,5 +296,12 @@ struct nvkm_pmu_impl { > } data; > > void (*pgob)(struct nvkm_pmu *, bool); > + struct pmu_desc pmudata; > }; > + > +static inline struct nvkm_pmu *impl_from_pmu(struct pmu_desc *pmu) > +{ > + return pmu->pmu; > +} > + > #endif > -- > 1.9.1 > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
Hi Mirkin, Your observations are quiet correct. After the boot code is submitted successfully, I will submit the code to configure & enable features of PMU.(This will be done by sending cmds to PMU). Now talking about this patch: Apart from just the boot code, I have also included some things in this patch that I can remove for now(I will include these things in later digestible chunks): - Debugfs support (can be removed for now) - Debug support for dumping PMU falcon registers(can be removed for now) - PMU interacts with Kernel via interrupt mechanism. For interaction with PMU, we have defined command structs, functions to prepare/validate and send commands to PMU. This infrastructure is basically to send commands to PMU.(right now it can be removed though we still require to receive messages from PMU to know if it has booted successfully). But this will be all that I will be able to remove from this patch. Can I go ahead with removing above suggestions? Regards, Deepak G -----Original Message----- From: ibmirkin at gmail.com [mailto:ibmirkin at gmail.com] On Behalf Of Ilia Mirkin Sent: Wednesday, March 11, 2015 10:41 PM To: Deepak Goyal Cc: Ben Skeggs; Alexandre Courbot; nouveau at lists.freedesktop.org; linux-tegra at vger.kernel.org Subject: Re: [Nouveau] [PATCH] pmu/gk20a: PMU boot support. Hi Deepak, There's... a lot of stuff going on here. Can you describe the goal of this patch (which could then be used as the patch commit message)? The current one basically boils down to "Add support for loading PMU", but merely loading the fw into a fuc engine is just a handful lines of code. Also, except in rare cases, it's customary to split up patches of this size into smaller, more reviewable chunks, which add on bits of functionality as they go. From what I can tell, you're adding the kernel-side interface for a hypothetical (and presumably closed-source) PMU blob that NVIDIA will supply. In essence, the blob is expected to implement a RTOS which runs on the PMU's falcon CPU. There are a bunch of API's implemented by this blob that the host can call, but it also does things on its own. For the kernel side, each of these API calls should probably be a separate patch (after an initial "just load it and do nothing" style patch). Or perhaps have the infrastructure that you add first and then something that implements the API calls. Cheers, -ilia On Wed, Mar 11, 2015 at 2:33 AM, Deepak Goyal <dgoyal at nvidia.com> wrote:> It adds PMU boot support.It loads PMU > firmware into PMU falcon.RM/Kernel driver > receives INIT ack (through interrupt mechanism) > from PMU when PMU boots with success. > > Signed-off-by: Deepak Goyal <dgoyal at nvidia.com> > --- > drm/nouveau/include/nvkm/subdev/pmu.h | 26 +- > drm/nouveau/nvkm/subdev/pmu/base.c | 108 ++ > drm/nouveau/nvkm/subdev/pmu/gk20a.c | 2131 ++++++++++++++++++++++++++++++++- > drm/nouveau/nvkm/subdev/pmu/gk20a.h | 369 ++++++ > drm/nouveau/nvkm/subdev/pmu/priv.h | 264 ++++ > 5 files changed, 2884 insertions(+), 14 deletions(-) > create mode 100644 drm/nouveau/nvkm/subdev/pmu/gk20a.h > > diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h b/drm/nouveau/include/nvkm/subdev/pmu.h > index 7b86acc634a0..659b4e0ba02b 100644 > --- a/drm/nouveau/include/nvkm/subdev/pmu.h > +++ b/drm/nouveau/include/nvkm/subdev/pmu.h > @@ -1,7 +1,20 @@ > #ifndef __NVKM_PMU_H__ > #define __NVKM_PMU_H__ > #include <core/subdev.h> > +#include <core/device.h> > +#include <subdev/mmu.h> > +#include <linux/debugfs.h> > > +struct pmu_buf_desc { > + struct nvkm_gpuobj *pmubufobj; > + struct nvkm_vma pmubufvma; > + size_t size; > +}; > +struct pmu_priv_vm { > + struct nvkm_gpuobj *mem; > + struct nvkm_gpuobj *pgd; > + struct nvkm_vm *vm; > +}; > struct nvkm_pmu { > struct nvkm_subdev base; > > @@ -20,9 +33,20 @@ struct nvkm_pmu { > u32 message; > u32 data[2]; > } recv; > - > + wait_queue_head_t init_wq; > + bool gr_initialised; > + struct dentry *debugfs; > + struct pmu_buf_desc *pg_buf; > + struct pmu_priv_vm *pmuvm; > int (*message)(struct nvkm_pmu *, u32[2], u32, u32, u32, u32); > void (*pgob)(struct nvkm_pmu *, bool); > + int (*pmu_mutex_acquire)(struct nvkm_pmu *, u32 id, u32 *token); > + int (*pmu_mutex_release)(struct nvkm_pmu *, u32 id, u32 *token); > + int (*pmu_load_norm)(struct nvkm_pmu *pmu, u32 *load); > + int (*pmu_load_update)(struct nvkm_pmu *pmu); > + void (*pmu_reset_load_counters)(struct nvkm_pmu *pmu); > + void (*pmu_get_load_counters)(struct nvkm_pmu *pmu, u32 *busy_cycles, > + u32 *total_cycles); > }; > > static inline struct nvkm_pmu * > diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c b/drm/nouveau/nvkm/subdev/pmu/base.c > index 054b2d2eec35..6afd389b9764 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/base.c > +++ b/drm/nouveau/nvkm/subdev/pmu/base.c > @@ -25,6 +25,114 @@ > > #include <subdev/timer.h> > > +/* init allocator struct */ > +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator, > + const char *name, u32 start, u32 len) > +{ > + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator)); > + > + strncpy(allocator->name, name, 32); > + > + allocator->base = start; > + allocator->limit = start + len - 1; > + > + allocator->bitmap = kcalloc(BITS_TO_LONGS(len), sizeof(long), > + GFP_KERNEL); > + if (!allocator->bitmap) > + return -ENOMEM; > + > + allocator_dbg(allocator, "%s : base %d, limit %d", > + allocator->name, allocator->base); > + > + init_rwsem(&allocator->rw_sema); > + > + allocator->alloc = nvkm_pmu_allocator_block_alloc; > + allocator->free = nvkm_pmu_allocator_block_free; > + > + return 0; > +} > + > +/* destroy allocator, free all remaining blocks if any */ > +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator) > +{ > + down_write(&allocator->rw_sema); > + > + kfree(allocator->bitmap); > + > + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator)); > +} > + > +/* > + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is > + * returned to caller in *addr. > + * > + * contiguous allocation, which allocates one block of > + * contiguous address. > +*/ > +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator, > + u32 *addr, u32 len, u32 align) > +{ > + unsigned long _addr; > + > + allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); > + > + if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ > + *addr + len > allocator->limit || /* check addr range */ > + *addr & (align - 1) || /* check addr alignment */ > + len == 0) /* check len */ > + return -EINVAL; > + > + len = ALIGN(len, align); > + if (!len) > + return -ENOMEM; > + > + down_write(&allocator->rw_sema); > + > + _addr = bitmap_find_next_zero_area(allocator->bitmap, > + allocator->limit - allocator->base + 1, > + *addr ? (*addr - allocator->base) : 0, > + len, > + align - 1); > + if ((_addr > allocator->limit - allocator->base + 1) || > + (*addr && *addr != (_addr + allocator->base))) { > + up_write(&allocator->rw_sema); > + return -ENOMEM; > + } > + > + bitmap_set(allocator->bitmap, _addr, len); > + *addr = allocator->base + _addr; > + > + up_write(&allocator->rw_sema); > + > + allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); > + > + return 0; > +} > + > +/* free all blocks between start and end */ > +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator, > + u32 addr, u32 len, u32 align) > +{ > + allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); > + > + if (addr + len > allocator->limit || /* check addr range */ > + addr < allocator->base || > + addr & (align - 1)) /* check addr alignment */ > + return -EINVAL; > + > + len = ALIGN(len, align); > + if (!len) > + return -EINVAL; > + > + down_write(&allocator->rw_sema); > + bitmap_clear(allocator->bitmap, addr - allocator->base, len); > + up_write(&allocator->rw_sema); > + > + allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); > + > + return 0; > +} > + > void > nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable) > { > diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c > index a49934bbe637..0fd2530301a3 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c > +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c > @@ -20,21 +20,67 @@ > * DEALINGS IN THE SOFTWARE. > */ > #include "priv.h" > +#include "gk20a.h" > +#include <core/client.h> > +#include <core/gpuobj.h> > +#include <subdev/bar.h> > +#include <subdev/fb.h> > +#include <subdev/mc.h> > +#include <subdev/timer.h> > +#include <subdev/mmu.h> > +#include <subdev/pmu.h> > +#include <engine/falcon.h> > > +#include <linux/delay.h> /* for mdelay */ > +#include <linux/firmware.h> > +#include <linux/clk.h> > +#include <linux/module.h> > +#include <linux/debugfs.h> > +#include <linux/dma-mapping.h> > +#include <linux/uaccess.h> > #include <subdev/clk.h> > #include <subdev/timer.h> > #include <subdev/volt.h> > > #define BUSY_SLOT 0 > #define CLK_SLOT 7 > +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" > + > +static int falc_trace_show(struct seq_file *s, void *data); > +static int falc_trace_open(struct inode *inode, struct file *file) > +{ > + return single_open(file, falc_trace_show, inode->i_private); > +} > +static const struct file_operations falc_trace_fops = { > + .open = falc_trace_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = single_release, > +}; > +struct pmu_priv_vm pmuvm; > +const struct firmware *pmufw; > + > +static void gk20a_pmu_isr(struct nvkm_pmu *ppmu); > +static void pmu_process_message(struct work_struct *work); > + > +static int > +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw); > +static void > +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, const struct firmware *fw); > + > +static int > +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw); > +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu); > +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc); > +static void gk20a_pmu_intr(struct nvkm_subdev *subdev); > > +static void gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable); > struct gk20a_pmu_dvfs_data { > int p_load_target; > int p_load_max; > int p_smooth; > unsigned int avg_load; > }; > - > struct gk20a_pmu_priv { > struct nvkm_pmu base; > struct nvkm_alarm alarm; > @@ -46,7 +92,30 @@ struct gk20a_pmu_dvfs_dev_status { > unsigned long busy; > int cur_state; > }; > - > +int gk20a_pmu_debugfs_init(struct nvkm_pmu *ppmu) > +{ > + struct dentry *d; > + ppmu->debugfs = debugfs_create_dir("PMU", NULL); > + if (!ppmu->debugfs) > + goto err_out; > + nv_debug(ppmu, "PMU directory created with success\n"); > + d = debugfs_create_file( > + "falc_trace", 0644, ppmu->debugfs, ppmu, > + &falc_trace_fops); > + if (!d) > + goto err_out; > + return 0; > +err_out: > + pr_err("%s: Failed to make debugfs node\n", __func__); > + debugfs_remove_recursive(ppmu->debugfs); > + return -ENOMEM; > +} > +void gk20a_pmu_release_firmware(struct nvkm_pmu *ppmu, > + const struct firmware *pfw) > +{ > + nv_debug(ppmu, "firmware released\n"); > + release_firmware(pfw); > +} > static int > gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state) > { > @@ -164,31 +233,145 @@ gk20a_pmu_fini(struct nvkm_object *object, bool suspend) > { > struct nvkm_pmu *pmu = (void *)object; > struct gk20a_pmu_priv *priv = (void *)pmu; > - > + nv_wr32(pmu, 0x10a014, 0x00000060); > + flush_work(&pmu->recv.work); > nvkm_timer_alarm_cancel(priv, &priv->alarm); > > return nvkm_subdev_fini(&pmu->base, suspend); > } > +static bool find_hex_in_string(char *strings, u32 *hex_pos) > +{ > + u32 i = 0, j = strlen(strings); > + for (; i < j; i++) { > + if (strings[i] == '%') > + if (strings[i + 1] == 'x' || strings[i + 1] == 'X') { > + *hex_pos = i; > + return true; > + } > + } > + *hex_pos = -1; > + return false; > +} > +static int falc_trace_show(struct seq_file *s, void *data) > +{ > + struct nvkm_pmu *ppmu = s->private; > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + u32 i = 0, j = 0, k, l, m; > + char part_str[40]; > + u32 data1; > + char *log_data = kmalloc(GK20A_PMU_TRACE_BUFSIZE, GFP_KERNEL); > + char *trace = log_data; > + u32 *trace1 = (u32 *)log_data; > + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 4) { > + data1 = nv_ro32(pmu->trace_buf.pmubufobj, 0x0000 + i); > + memcpy(log_data + i, (void *)(&data1), 32); > + } > + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) { > + for (j = 0; j < 0x40; j++) > + if (trace1[(i / 4) + j]) > + break; > + if (j == 0x40) > + goto out; > + seq_printf(s, "Index %x: ", trace1[(i / 4)]); > + l = 0; > + m = 0; > + while (find_hex_in_string((trace+i+20+m), &k)) { > + if (k >= 40) > + break; > + strncpy(part_str, (trace+i+20+m), k); > + part_str[k] = 0; > + seq_printf(s, "%s0x%x", part_str, > + trace1[(i / 4) + 1 + l]); > + l++; > + m += k + 2; > + } > + seq_printf(s, "%s", (trace+i+20+m)); > + } > +out: > + kfree(log_data); > + return 0; > +} > > int > gk20a_pmu_init(struct nvkm_object *object) > { > - struct nvkm_pmu *pmu = (void *)object; > - struct gk20a_pmu_priv *priv = (void *)pmu; > + struct nvkm_pmu *ppmu = (void *)object; > + struct nvkm_mc *pmc = nvkm_mc(object); > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu; > + struct gk20a_pmu_priv *priv; > + struct pmu_gk20a_data *gk20adata; > int ret; > > - ret = nvkm_subdev_init(&pmu->base); > + pmu = &impl->pmudata; > + > + nv_subdev(ppmu)->intr = gk20a_pmu_intr; > + > + mutex_init(&pmu->isr_mutex); > + mutex_init(&pmu->pmu_copy_lock); > + mutex_init(&pmu->pmu_seq_lock); > + > + if (pmufw == NULL) { > + ret = gk20a_pmu_load_firmware(ppmu, &pmufw); > + if (ret < 0) { > + nv_error(ppmu, "failed to load pmu fimware\n"); > + return ret; > + } > + nv_debug(ppmu, "loading firmware sucessful\n"); > + ret = gk20a_pmu_init_vm(ppmu, pmufw); > + if (ret < 0) { > + nv_error(ppmu, "failed to map pmu fw to va space\n"); > + goto init_vm_err; > + } > + } > + pmu->desc = (struct pmu_ucode_desc *)pmufw->data; > + gk20a_pmu_dump_firmware_info(ppmu, pmufw); > + > + if (pmu->desc->app_version != APP_VERSION_GK20A) { > + nv_error(ppmu, > + "PMU code version not supported version: %d\n", > + pmu->desc->app_version); > + ret = -EINVAL; > + goto app_ver_err; > + } > + gk20adata = kzalloc(sizeof(*gk20adata), GFP_KERNEL); > + if (!gk20adata) { > + ret = -ENOMEM; > + goto err; > + } > + > + pmu->pmu_chip_data = (void *)gk20adata; > + > + pmu->remove_support = gk20a_remove_pmu_support; > + > + ret = gk20a_init_pmu_setup_sw(ppmu); > if (ret) > - return ret; > + goto err; > + > + pmu->pmu_state = PMU_STATE_STARTING; > + ret = gk20a_init_pmu_setup_hw1(ppmu, pmc); > + if (ret) > + goto err; > + > + priv = (void *)ppmu; > > - pmu->pgob = nvkm_pmu_pgob; > + ret = nvkm_subdev_init(&ppmu->base); > + if (ret) > + goto err; > + > + ppmu->pgob = nvkm_pmu_pgob; > > - /* init pwr perf counter */ > - nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); > - nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); > - nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); > + /* init pmu perf counter */ > + nv_wr32(ppmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); > + nv_wr32(ppmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); > + nv_wr32(ppmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); > > - nvkm_timer_alarm(pmu, 2000000000, &priv->alarm); > + nvkm_timer_alarm(ppmu, 2000000000, &priv->alarm); > +err: > +init_vm_err: > +app_ver_err: > + gk20a_pmu_release_firmware(ppmu, pmufw); > return ret; > } > > @@ -226,4 +409,1926 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) { > .init = gk20a_pmu_init, > .fini = gk20a_pmu_fini, > }, > + .base.handle = NV_SUBDEV(PMU, 0xea), > + .pgob = gk20a_pmu_pgob, > }.base; > +void pmu_copy_from_dmem(struct pmu_desc *pmu, > + u32 src, u8 *dst, u32 size, u8 port) > +{ > + u32 i, words, bytes; > + u32 data, addr_mask; > + u32 *dst_u32 = (u32 *)dst; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + if (size == 0) { > + nv_error(ppmu, "size is zero\n"); > + goto out; > + } > + > + if (src & 0x3) { > + nv_error(ppmu, "src (0x%08x) not 4-byte aligned\n", src); > + goto out; > + } > + > + mutex_lock(&pmu->pmu_copy_lock); > + > + words = size >> 2; > + bytes = size & 0x3; > + > + addr_mask = (0x3f << 2) | 0xff << 8; > + > + src &= addr_mask; > + > + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (src | (0x1 << 25))); > + > + for (i = 0; i < words; i++) { > + dst_u32[i] = nv_rd32(ppmu, (0x0010a1c4 + port * 8)); > + nv_debug(ppmu, "0x%08x\n", dst_u32[i]); > + } > + if (bytes > 0) { > + data = nv_rd32(ppmu, (0x0010a1c4 + port * 8)); > + nv_debug(ppmu, "0x%08x\n", data); > + > + for (i = 0; i < bytes; i++) > + dst[(words << 2) + i] = ((u8 *)&data)[i]; > + } > + mutex_unlock(&pmu->pmu_copy_lock); > +out: > + nv_debug(ppmu, "exit %s\n", __func__); > +} > + > +void pmu_copy_to_dmem(struct pmu_desc *pmu, > + u32 dst, u8 *src, u32 size, u8 port) > +{ > + u32 i, words, bytes; > + u32 data, addr_mask; > + u32 *src_u32 = (u32 *)src; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + if (size == 0) { > + nv_error(ppmu, "size is zero\n"); > + goto out; > + } > + > + if (dst & 0x3) { > + nv_error(ppmu, "dst (0x%08x) not 4-byte aligned\n", dst); > + goto out; > + } > + > + mutex_lock(&pmu->pmu_copy_lock); > + > + words = size >> 2; > + bytes = size & 0x3; > + > + addr_mask = (0x3f << 2) | 0xff << 8; > + > + dst &= addr_mask; > + > + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24))); > + > + for (i = 0; i < words; i++) { > + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), src_u32[i]); > + nv_debug(ppmu, "0x%08x\n", src_u32[i]); > + } > + if (bytes > 0) { > + data = 0; > + for (i = 0; i < bytes; i++) > + ((u8 *)&data)[i] = src[(words << 2) + i]; > + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), data); > + nv_debug(ppmu, "0x%08x\n", data); > + } > + > + data = nv_rd32(ppmu, (0x10a1c0 + (port * 8))) & addr_mask; > + size = ALIGN(size, 4); > + if (data != dst + size) { > + nv_error(ppmu, "copy failed. bytes written %d, expected %d", > + data - dst, size); > + } > + mutex_unlock(&pmu->pmu_copy_lock); > +out: > + nv_debug(ppmu, "exit %s", __func__); > +} > + > +static int pmu_idle(struct nvkm_pmu *ppmu) > +{ > + unsigned long end_jiffies = jiffies + > + msecs_to_jiffies(2000); > + u32 idle_stat; > + > + /* wait for pmu idle */ > + do { > + idle_stat = nv_rd32(ppmu, 0x0010a04c); > + > + if (((idle_stat & 0x01) == 0) && > + ((idle_stat >> 1) & 0x7fff) == 0) { > + break; > + } > + > + if (time_after_eq(jiffies, end_jiffies)) { > + nv_error(ppmu, "timeout waiting pmu idle : 0x%08x", > + idle_stat); > + return -EBUSY; > + } > + usleep_range(100, 200); > + } while (1); > + > + return 0; > +} > + > +void pmu_enable_irq(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, > + bool enable) > +{ > + > + nv_wr32(pmc, 0x00000640, > + nv_rd32(pmc, 0x00000640) & > + ~0x1000000); > + nv_wr32(pmc, 0x00000644, > + nv_rd32(pmc, 0x00000644) & > + ~0x1000000); > + nv_wr32(ppmu, 0x0010a014, 0xff); > + > + if (enable) { > + nv_debug(ppmu, "enable pmu irq\n"); > + /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1 > + nv_wr32(ppmu, 0x0010a01c, 0xff01ff52); > + 0=disable, 1=enable*/ > + > + nv_wr32(ppmu, 0x0010a010, 0xff); > + nv_wr32(pmc, 0x00000640, > + nv_rd32(pmc, 0x00000640) | > + 0x1000000); > + nv_wr32(pmc, 0x00000644, > + nv_rd32(pmc, 0x00000644) | > + 0x1000000); > + } else { > + nv_debug(ppmu, "disable pmu irq\n"); > + } > + > +} > + > +static int pmu_enable_hw(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, > + bool enable) > +{ > + u32 reg; > + > + if (enable) { > + int retries = GK20A_IDLE_CHECK_MAX / GK20A_IDLE_CHECK_DEFAULT; > + /*need a spinlock?*/ > + reg = nv_rd32(pmc, 0x00000200); > + reg |= 0x2000; > + nv_wr32(pmc, 0x00000200, reg); > + nv_rd32(pmc, 0x00000200); > + do { > + u32 w = nv_rd32(ppmu, 0x0010a10c) & 0x6; > + > + if (!w) > + return 0; > + > + udelay(GK20A_IDLE_CHECK_DEFAULT); > + } while (--retries); > + > + reg = nv_rd32(pmc, 0x00000200); > + reg &= ~0x2000; > + nv_wr32(pmc, 0x00000200, reg); > + nv_error(ppmu, "Falcon mem scrubbing timeout\n"); > + > + goto error; > + } else { > + reg = nv_rd32(pmc, 0x00000200); > + reg &= ~0x2000; > + nv_wr32(pmc, 0x00000200, reg); > + return 0; > + } > +error: > + return -ETIMEDOUT; > +} > + > +static int pmu_enable(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, > + bool enable) > +{ > + u32 pmc_enable; > + int err; > + > + if (!enable) { > + pmc_enable = nv_rd32(pmc, 0x200); > + if ((pmc_enable & 0x2000) != 0x0) { > + pmu_enable_irq(ppmu, pmc, false); > + pmu_enable_hw(ppmu, pmc, false); > + } > + } else { > + err = pmu_enable_hw(ppmu, pmc, true); > + if (err) > + return err; > + > + /* TBD: post reset */ > + > + err = pmu_idle(ppmu); > + if (err) > + return err; > + > + pmu_enable_irq(ppmu, pmc, true); > + } > + > + return 0; > +} > + > +int pmu_reset(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) > +{ > + int err; > + > + err = pmu_idle(ppmu); > + if (err) > + return err; > + > + /* TBD: release pmu hw mutex */ > + > + err = pmu_enable(ppmu, pmc, false); > + if (err) > + return err; > + > + err = pmu_enable(ppmu, pmc, true); > + if (err) > + return err; > + > + return 0; > +} > + > +static int pmu_bootstrap(struct pmu_desc *pmu) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_ucode_desc *desc = pmu->desc; > + u64 addr_code, addr_data, addr_load; > + u32 i, blocks, addr_args; > + u32 *adr_data, *adr_load, *adr_code; > + struct pmu_cmdline_args_gk20a cmdline_args; > + struct pmu_priv_vm *ppmuvm = &pmuvm; > + > + nv_wr32(ppmu, 0x0010a048, > + nv_rd32(ppmu, 0x0010a048) | 0x01); > + /*bind the address*/ > + nv_wr32(ppmu, 0x0010a480, > + ppmuvm->mem->addr >> 12 | > + 0x1 << 30 | > + 0x20000000); > + > + /* TBD: load all other surfaces */ > + cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE; > + cmdline_args.falc_trace_dma_base > + u64_lo32(pmu->trace_buf.pmubufvma.offset >> 8); > + cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT; > + cmdline_args.cpu_freq_hz = 204; > + cmdline_args.secure_mode = 0; > + > + addr_args = (nv_rd32(ppmu, 0x0010a108) >> 9) & 0x1ff; > + addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2; > + addr_args -= sizeof(struct pmu_cmdline_args_gk20a); > + nv_debug(ppmu, "initiating copy to dmem\n"); > + pmu_copy_to_dmem(pmu, addr_args, > + (u8 *)&cmdline_args, > + sizeof(struct pmu_cmdline_args_gk20a), 0); > + > + nv_wr32(ppmu, 0x0010a1c0, 0x1 << 24); > + > + > + addr_code = u64_lo32((pmu->ucode.pmubufvma.offset + > + desc->app_start_offset + > + desc->app_resident_code_offset) >> 8); > + > + addr_data = u64_lo32((pmu->ucode.pmubufvma.offset + > + desc->app_start_offset + > + desc->app_resident_data_offset) >> 8); > + > + addr_load = u64_lo32((pmu->ucode.pmubufvma.offset + > + desc->bootloader_start_offset) >> 8); > + > + adr_code = (u32 *) (&addr_code); > + adr_load = (u32 *) (&addr_load); > + adr_data = (u32 *) (&addr_data); > + nv_wr32(ppmu, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE); > + nv_debug(ppmu, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE); > + nv_wr32(ppmu, 0x0010a1c4, *(adr_code)); > + nv_debug(ppmu, "0x%08x\n", *(adr_code)); > + nv_wr32(ppmu, 0x0010a1c4, desc->app_size); > + nv_debug(ppmu, "0x%08x\n", desc->app_size); > + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_code_size); > + nv_debug(ppmu, "0x%08x\n", desc->app_resident_code_size); > + nv_wr32(ppmu, 0x0010a1c4, desc->app_imem_entry); > + nv_debug(ppmu, "0x%08x\n", desc->app_imem_entry); > + nv_wr32(ppmu, 0x0010a1c4, *(adr_data)); > + nv_debug(ppmu, "0x%08x\n", *(adr_data)); > + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_data_size); > + nv_debug(ppmu, "0x%08x\n", desc->app_resident_data_size); > + nv_wr32(ppmu, 0x0010a1c4, *(adr_code)); > + nv_debug(ppmu, "0x%08x\n", *(adr_code)); > + nv_wr32(ppmu, 0x0010a1c4, 0x1); > + nv_debug(ppmu, "0x%08x\n", 1); > + nv_wr32(ppmu, 0x0010a1c4, addr_args); > + nv_debug(ppmu, "0x%08x\n", addr_args); > + > + > + nv_wr32(ppmu, 0x0010a110, > + *(adr_load) - (desc->bootloader_imem_offset >> 8)); > + > + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; > + > + for (i = 0; i < blocks; i++) { > + nv_wr32(ppmu, 0x0010a114, > + desc->bootloader_imem_offset + (i << 8)); > + nv_wr32(ppmu, 0x0010a11c, > + desc->bootloader_imem_offset + (i << 8)); > + nv_wr32(ppmu, 0x0010a118, > + 0x01 << 4 | > + 0x06 << 8 | > + ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12)); > + } > + > + > + nv_wr32(ppmu, 0x0010a104, > + (0xffffffff & desc->bootloader_entry_point)); > + > + nv_wr32(ppmu, 0x0010a100, 0x1 << 1); > + > + nv_wr32(ppmu, 0x0010a080, desc->app_version); > + > + return 0; > +} > + > +void pmu_seq_init(struct pmu_desc *pmu) > +{ > + u32 i; > + > + memset(pmu->seq, 0, > + sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES); > + memset(pmu->pmu_seq_tbl, 0, > + sizeof(pmu->pmu_seq_tbl)); > + > + for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++) > + pmu->seq[i].id = i; > +} > + > +static int pmu_seq_acquire(struct pmu_desc *pmu, > + struct pmu_sequence **pseq) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_sequence *seq; > + u32 index; > + > + mutex_lock(&pmu->pmu_seq_lock); > + index = find_first_zero_bit(pmu->pmu_seq_tbl, > + sizeof(pmu->pmu_seq_tbl)); > + if (index >= sizeof(pmu->pmu_seq_tbl)) { > + nv_error(ppmu, > + "no free sequence available"); > + mutex_unlock(&pmu->pmu_seq_lock); > + return -EAGAIN; > + } > + set_bit(index, pmu->pmu_seq_tbl); > + mutex_unlock(&pmu->pmu_seq_lock); > + > + seq = &pmu->seq[index]; > + seq->state = PMU_SEQ_STATE_PENDING; > + > + *pseq = seq; > + return 0; > +} > + > +static void pmu_seq_release(struct pmu_desc *pmu, > + struct pmu_sequence *seq) > +{ > + seq->state = PMU_SEQ_STATE_FREE; > + seq->desc = PMU_INVALID_SEQ_DESC; > + seq->callback = NULL; > + seq->cb_params = NULL; > + seq->msg = NULL; > + seq->out_payload = NULL; > + seq->in_gk20a.alloc.dmem.size = 0; > + seq->out_gk20a.alloc.dmem.size = 0; > + clear_bit(seq->id, pmu->pmu_seq_tbl); > +} > + > +static int pmu_queue_init(struct pmu_desc *pmu, > + u32 id, struct pmu_init_msg_pmu_gk20a *init) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_queue *queue = &pmu->queue[id]; > + > + queue->id = id; > + queue->index = init->queue_info[id].index; > + queue->offset = init->queue_info[id].offset; > + queue->size = init->queue_info[id].size; > + queue->mutex_id = id; > + mutex_init(&queue->mutex); > + > + nv_debug(ppmu, "queue %d: index %d, offset 0x%08x, size 0x%08x", > + id, queue->index, queue->offset, queue->size); > + > + return 0; > +} > + > +static int pmu_queue_head(struct pmu_desc *pmu, struct pmu_queue *queue, > + u32 *head, bool set) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + BUG_ON(!head); > + > + if (PMU_IS_COMMAND_QUEUE(queue->id)) { > + > + if (queue->index >= 0x00000004) > + return -EINVAL; > + > + if (!set) > + *head = nv_rd32(ppmu, 0x0010a4a0 + (queue->index * 4)) & > + 0xffffffff; > + else > + nv_wr32(ppmu, > + (0x0010a4a0 + (queue->index * 4)), > + (*head & 0xffffffff)); > + } else { > + if (!set) > + *head = nv_rd32(ppmu, 0x0010a4c8) & 0xffffffff; > + else > + nv_wr32(ppmu, 0x0010a4c8, (*head & 0xffffffff)); > + } > + > + return 0; > +} > + > +static int pmu_queue_tail(struct pmu_desc *pmu, struct pmu_queue *queue, > + u32 *tail, bool set) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + BUG_ON(!tail); > + > + if (PMU_IS_COMMAND_QUEUE(queue->id)) { > + > + if (queue->index >= 0x00000004) > + return -EINVAL; > + > + if (!set) > + *tail = nv_rd32(ppmu, 0x0010a4b0 + (queue->index * 4)) & > + 0xffffffff; > + else > + nv_wr32(ppmu, (0x0010a4b0 + (queue->index * 4)), > + (*tail & 0xffffffff)); > + } else { > + if (!set) > + *tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff; > + else > + nv_wr32(ppmu, 0x0010a4cc, (*tail & 0xffffffff)); > + } > + > + return 0; > +} > + > +static inline void pmu_queue_read(struct pmu_desc *pmu, > + u32 offset, u8 *dst, u32 size) > +{ > + pmu_copy_from_dmem(pmu, offset, dst, size, 0); > +} > + > +static inline void pmu_queue_write(struct pmu_desc *pmu, > + u32 offset, u8 *src, u32 size) > +{ > + pmu_copy_to_dmem(pmu, offset, src, size, 0); > +} > + > +int pmu_mutex_acquire(struct nvkm_pmu *ppmu, u32 id, u32 *token) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct pmu_mutex *mutex; > + u32 data, owner, max_retry; > + > + if (!pmu->initialized) > + return -EINVAL; > + > + BUG_ON(!token); > + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); > + BUG_ON(id > pmu->mutex_cnt); > + > + mutex = &pmu->mutex[id]; > + > + owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff; > + > + if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) { > + BUG_ON(mutex->ref_cnt == 0); > + nv_debug(ppmu, "already acquired by owner : 0x%08x", *token); > + mutex->ref_cnt++; > + return 0; > + } > + > + max_retry = 40; > + do { > + data = nv_rd32(ppmu, 0x0010a488) & 0xff; > + if (data == 0x00000000 || > + data == 0x000000ff) { > + nv_warn(ppmu, > + "fail to generate mutex token: val 0x%08x", > + owner); > + usleep_range(20, 40); > + continue; > + } > + > + owner = data; > + nv_wr32(ppmu, (0x0010a580 + mutex->index * 4), > + owner & 0xff); > + > + data = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)); > + > + if (owner == data) { > + mutex->ref_cnt = 1; > + nv_debug(ppmu, "mutex acquired: id=%d, token=0x%x", > + mutex->index, *token); > + *token = owner; > + goto out; > + } else { > + nv_debug(ppmu, "fail to acquire mutex idx=0x%08x", > + mutex->index); > + > + nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff)); > + > + usleep_range(20, 40); > + continue; > + } > + } while (max_retry-- > 0); > + > + return -EBUSY; > +out: > + return 0; > +} > + > +int pmu_mutex_release(struct nvkm_pmu *ppmu, u32 id, u32 *token) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct pmu_mutex *mutex; > + u32 owner; > + > + if (!pmu->initialized) > + return -EINVAL; > + > + BUG_ON(!token); > + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id)); > + BUG_ON(id > pmu->mutex_cnt); > + > + mutex = &pmu->mutex[id]; > + > + owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff; > + > + if (*token != owner) { > + nv_error(ppmu, > + "requester 0x%08x NOT match owner 0x%08x", > + *token, owner); > + return -EINVAL; > + } > + > + if (--mutex->ref_cnt > 0) > + return -EBUSY; > + > + nv_wr32(ppmu, 0x0010a580 + (mutex->index * 4), 0x00); > + > + nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff)); > + > + nv_debug(ppmu, "mutex released: id=%d, token=0x%x", > + mutex->index, *token); > + > + return 0; > +} > + > +static int pmu_queue_lock(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + int ret; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + if (PMU_IS_MESSAGE_QUEUE(queue->id)) > + return 0; > + > + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { > + mutex_lock(&queue->mutex); > + return 0; > + } > + > + ret = pmu_mutex_acquire(ppmu, queue->mutex_id, &queue->mutex_lock); > + return ret; > +} > + > +static int pmu_queue_unlock(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + int ret; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + if (PMU_IS_MESSAGE_QUEUE(queue->id)) > + return 0; > + > + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) { > + mutex_unlock(&queue->mutex); > + return 0; > + } > + > + ret = pmu_mutex_release(ppmu, queue->mutex_id, &queue->mutex_lock); > + return ret; > +} > + > +/* called by pmu_read_message, no lock */ > +static bool pmu_queue_is_empty(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + u32 head, tail; > + > + pmu_queue_head(pmu, queue, &head, QUEUE_GET); > + if (queue->opened && queue->oflag == OFLAG_READ) > + tail = queue->position; > + else > + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); > + > + return head == tail; > +} > + > +static bool pmu_queue_has_room(struct pmu_desc *pmu, > + struct pmu_queue *queue, u32 size, bool *need_rewind) > +{ > + u32 head, tail, free; > + bool rewind = false; > + > + size = ALIGN(size, QUEUE_ALIGNMENT); > + > + pmu_queue_head(pmu, queue, &head, QUEUE_GET); > + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET); > + > + if (head >= tail) { > + free = queue->offset + queue->size - head; > + free -= PMU_CMD_HDR_SIZE; > + > + if (size > free) { > + rewind = true; > + head = queue->offset; > + } > + } > + > + if (head < tail) > + free = tail - head - 1; > + > + if (need_rewind) > + *need_rewind = rewind; > + > + return size <= free; > +} > + > +static int pmu_queue_push(struct pmu_desc *pmu, > + struct pmu_queue *queue, void *data, u32 size) > +{ > + > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + if (!queue->opened && queue->oflag == OFLAG_WRITE) { > + nv_error(ppmu, "queue not opened for write\n"); > + return -EINVAL; > + } > + > + pmu_queue_write(pmu, queue->position, data, size); > + queue->position += ALIGN(size, QUEUE_ALIGNMENT); > + return 0; > +} > + > +static int pmu_queue_pop(struct pmu_desc *pmu, > + struct pmu_queue *queue, void *data, u32 size, > + u32 *bytes_read) > +{ > + u32 head, tail, used; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + *bytes_read = 0; > + > + if (!queue->opened && queue->oflag == OFLAG_READ) { > + nv_error(ppmu, "queue not opened for read\n"); > + return -EINVAL; > + } > + > + pmu_queue_head(pmu, queue, &head, QUEUE_GET); > + tail = queue->position; > + > + if (head == tail) > + return 0; > + > + if (head > tail) > + used = head - tail; > + else > + used = queue->offset + queue->size - tail; > + > + if (size > used) { > + nv_warn(ppmu, "queue size smaller than request read\n"); > + size = used; > + } > + > + pmu_queue_read(pmu, tail, data, size); > + queue->position += ALIGN(size, QUEUE_ALIGNMENT); > + *bytes_read = size; > + return 0; > +} > + > +static void pmu_queue_rewind(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + struct pmu_cmd cmd; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + > + if (!queue->opened) { > + nv_error(ppmu, "queue not opened\n"); > + goto out; > + } > + > + if (queue->oflag == OFLAG_WRITE) { > + cmd.hdr.unit_id = PMU_UNIT_REWIND; > + cmd.hdr.size = PMU_CMD_HDR_SIZE; > + pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size); > + nv_debug(ppmu, "queue %d rewinded\n", queue->id); > + } > + > + queue->position = queue->offset; > +out: > + nv_debug(ppmu, "exit %s\n", __func__); > +} > + > +/* open for read and lock the queue */ > +static int pmu_queue_open_read(struct pmu_desc *pmu, > + struct pmu_queue *queue) > +{ > + int err; > + > + err = pmu_queue_lock(pmu, queue); > + if (err) > + return err; > + > + if (queue->opened) > + BUG(); > + > + pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET); > + queue->oflag = OFLAG_READ; > + queue->opened = true; > + > + return 0; > +} > + > +/* open for write and lock the queue > + make sure there's enough free space for the write */ > +static int pmu_queue_open_write(struct pmu_desc *pmu, > + struct pmu_queue *queue, u32 size) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + bool rewind = false; > + int err; > + > + err = pmu_queue_lock(pmu, queue); > + if (err) > + return err; > + > + if (queue->opened) > + BUG(); > + > + if (!pmu_queue_has_room(pmu, queue, size, &rewind)) { > + nv_error(ppmu, "queue full"); > + pmu_queue_unlock(pmu, queue); > + return -EAGAIN; > + } > + > + pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET); > + queue->oflag = OFLAG_WRITE; > + queue->opened = true; > + > + if (rewind) > + pmu_queue_rewind(pmu, queue); > + > + return 0; > +} > + > +/* close and unlock the queue */ > +static int pmu_queue_close(struct pmu_desc *pmu, > + struct pmu_queue *queue, bool commit) > +{ > + if (!queue->opened) > + return 0; > + > + if (commit) { > + if (queue->oflag == OFLAG_READ) { > + pmu_queue_tail(pmu, queue, > + &queue->position, QUEUE_SET); > + } else { > + pmu_queue_head(pmu, queue, > + &queue->position, QUEUE_SET); > + } > + } > + > + queue->opened = false; > + > + pmu_queue_unlock(pmu, queue); > + > + return 0; > +} > + > +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout, > + u32 *var, u32 val) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); > + unsigned long delay = GK20A_IDLE_CHECK_DEFAULT; > + > + do { > + if (*var == val) > + return 0; > + > + if (nv_rd32(ppmu, 0x0010a008)) > + gk20a_pmu_isr(ppmu); > + > + usleep_range(delay, delay * 2); > + delay = min_t(u32, delay << 1, GK20A_IDLE_CHECK_MAX); > + } while (time_before(jiffies, end_jiffies)); > + > + return -ETIMEDOUT; > +} > + > +void pmu_dump_falcon_stats(struct pmu_desc *pmu) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + int i; > + > + nv_debug(ppmu, "pmu_falcon_os_r : %d\n", > + nv_rd32(ppmu, 0x0010a080)); > + nv_debug(ppmu, "pmu_falcon_cpuctl_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a100)); > + nv_debug(ppmu, "pmu_falcon_idlestate_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a04c)); > + nv_debug(ppmu, "pmu_falcon_mailbox0_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a040)); > + nv_debug(ppmu, "pmu_falcon_mailbox1_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a044)); > + nv_debug(ppmu, "pmu_falcon_irqstat_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a008)); > + nv_debug(ppmu, "pmu_falcon_irqmode_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a00c)); > + nv_debug(ppmu, "pmu_falcon_irqmask_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a018)); > + nv_debug(ppmu, "pmu_falcon_irqdest_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a01c)); > + > + for (i = 0; i < 0x0000000c; i++) > + nv_debug(ppmu, "pmu_pmu_mailbox_r(%d) : 0x%x\n", > + i, nv_rd32(ppmu, 0x0010a450 + i*4)); > + > + for (i = 0; i < 0x00000004; i++) > + nv_debug(ppmu, "pmu_pmu_debug_r(%d) : 0x%x\n", > + i, nv_rd32(ppmu, 0x0010a5c0 + i*4)); > + > + for (i = 0; i < 6/*NV_Ppmu_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) { > + nv_wr32(ppmu, 0x0010a200, > + 0xe | > + (i & 0x1f) << 8); > + nv_debug(ppmu, "pmu_rstat (%d) : 0x%x\n", > + i, nv_rd32(ppmu, 0x0010a20c)); > + } > + > + i = nv_rd32(ppmu, 0x0010a7b0); > + nv_debug(ppmu, "pmu_pmu_bar0_error_status_r : 0x%x\n", i); > + if (i != 0) { > + nv_debug(ppmu, "pmu_pmu_bar0_addr_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a7a0)); > + nv_debug(ppmu, "pmu_pmu_bar0_data_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a7a4)); > + nv_debug(ppmu, "pmu_pmu_bar0_timeout_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a7a8)); > + nv_debug(ppmu, "pmu_pmu_bar0_ctl_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a7ac)); > + } > + > + i = nv_rd32(ppmu, 0x0010a988); > + nv_debug(ppmu, "pmu_pmu_bar0_fecs_error_r : 0x%x\n", i); > + > + i = nv_rd32(ppmu, 0x0010a16c); > + nv_debug(ppmu, "pmu_falcon_exterrstat_r : 0x%x\n", i); > + if (((i >> 31) & 0x1)) { > + nv_debug(ppmu, "pmu_falcon_exterraddr_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a168)); > + /*nv_debug(ppmu, "pmc_enable : 0x%x\n", > + nv_rd32(pmc, 0x00000200));*/ > + } > + > + nv_debug(ppmu, "pmu_falcon_engctl_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a0a4)); > + nv_debug(ppmu, "pmu_falcon_curctx_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a050)); > + nv_debug(ppmu, "pmu_falcon_nxtctx_r : 0x%x\n", > + nv_rd32(ppmu, 0x0010a054)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_IMB & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_IMB : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_DMB & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_DMB : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_CSW & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_CSW : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_CTX & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_CTX : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_EXCI & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_EXCI : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + for (i = 0; i < 4; i++) { > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_PC & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_PC : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + > + nv_wr32(ppmu, 0x0010a200, > + 0x8 | > + ((PMU_FALCON_REG_SP & 0x1f) << 8)); > + nv_debug(ppmu, "PMU_FALCON_REG_SP : 0x%x\n", > + nv_rd32(ppmu, 0x0010a20c)); > + } > + > + /* PMU may crash due to FECS crash. Dump FECS status */ > + /*gk20a_fecs_dump_falcon_stats(g);*/ > +} > + > +static bool pmu_validate_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd, > + struct pmu_msg *msg, struct pmu_payload *payload, > + u32 queue_id) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_queue *queue; > + u32 in_size, out_size; > + > + nv_debug(ppmu, "pmu validate cmd\n"); > + pmu_dump_falcon_stats(pmu); > + > + if (!PMU_IS_SW_COMMAND_QUEUE(queue_id)) > + goto invalid_cmd; > + > + queue = &pmu->queue[queue_id]; > + if (cmd->hdr.size < PMU_CMD_HDR_SIZE) > + goto invalid_cmd; > + > + if (cmd->hdr.size > (queue->size >> 1)) > + goto invalid_cmd; > + > + if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE) > + goto invalid_cmd; > + > + if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id)) > + goto invalid_cmd; > + > + if (payload == NULL) > + return true; > + > + if (payload->in.buf == NULL && payload->out.buf == NULL) > + goto invalid_cmd; > + > + if ((payload->in.buf != NULL && payload->in.size == 0) || > + (payload->out.buf != NULL && payload->out.size == 0)) > + goto invalid_cmd; > + > + in_size = PMU_CMD_HDR_SIZE; > + if (payload->in.buf) { > + in_size += payload->in.offset; > + in_size += sizeof(struct pmu_allocation_gk20a); > + } > + > + out_size = PMU_CMD_HDR_SIZE; > + if (payload->out.buf) { > + out_size += payload->out.offset; > + out_size += sizeof(struct pmu_allocation_gk20a); > + } > + > + if (in_size > cmd->hdr.size || out_size > cmd->hdr.size) > + goto invalid_cmd; > + > + > + if ((payload->in.offset != 0 && payload->in.buf == NULL) || > + (payload->out.offset != 0 && payload->out.buf == NULL)) > + goto invalid_cmd; > + > + return true; > + > +invalid_cmd: > + nv_error(ppmu, "invalid pmu cmd :\n" > + "queue_id=%d,\n" > + "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n" > + "payload in=%p, in_size=%d, in_offset=%d,\n" > + "payload out=%p, out_size=%d, out_offset=%d", > + queue_id, cmd->hdr.size, cmd->hdr.unit_id, > + msg, msg ? msg->hdr.unit_id : ~0, > + &payload->in, payload->in.size, payload->in.offset, > + &payload->out, payload->out.size, payload->out.offset); > + > + return false; > +} > + > +static int pmu_write_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd, > + u32 queue_id, unsigned long timeout) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_queue *queue; > + unsigned long end_jiffies = jiffies + > + msecs_to_jiffies(timeout); > + int err; > + > + nv_debug(ppmu, "pmu write cmd\n"); > + > + queue = &pmu->queue[queue_id]; > + > + do { > + err = pmu_queue_open_write(pmu, queue, cmd->hdr.size); > + if (err == -EAGAIN && time_before(jiffies, end_jiffies)) > + usleep_range(1000, 2000); > + else > + break; > + } while (1); > + > + if (err) > + goto clean_up; > + > + pmu_queue_push(pmu, queue, cmd, cmd->hdr.size); > + > + err = pmu_queue_close(pmu, queue, true); > + > +clean_up: > + if (err) > + nv_error(ppmu, > + "fail to write cmd to queue %d", queue_id); > + else > + nv_debug(ppmu, "cmd writing done"); > + > + return err; > +} > + > +int gk20a_pmu_cmd_post(struct nvkm_pmu *ppmu, struct pmu_cmd *cmd, > + struct pmu_msg *msg, struct pmu_payload *payload, > + u32 queue_id, pmu_callback callback, void *cb_param, > + u32 *seq_desc, unsigned long timeout) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct pmu_sequence *seq; > + struct pmu_allocation_gk20a *in = NULL, *out = NULL; > + int err; > + > + BUG_ON(!cmd); > + BUG_ON(!seq_desc); > + BUG_ON(!pmu->pmu_ready); > + nv_debug(ppmu, "Post CMD\n"); > + if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id)) > + return -EINVAL; > + > + err = pmu_seq_acquire(pmu, &seq); > + if (err) > + return err; > + > + cmd->hdr.seq_id = seq->id; > + > + cmd->hdr.ctrl_flags = 0; > + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS; > + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR; > + > + seq->callback = callback; > + seq->cb_params = cb_param; > + seq->msg = msg; > + seq->out_payload = NULL; > + seq->desc = pmu->next_seq_desc++; > + > + if (payload) > + seq->out_payload = payload->out.buf; > + > + *seq_desc = seq->desc; > + > + if (payload && payload->in.offset != 0) { > + in = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd + > + payload->in.offset); > + > + if (payload->in.buf != payload->out.buf) > + in->alloc.dmem.size = (u16)payload->in.size; > + else > + in->alloc.dmem.size > + (u16)max(payload->in.size, payload->out.size); > + > + err = pmu->dmem.alloc(&pmu->dmem, > + (void *)&in->alloc.dmem.offset, > + in->alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + if (err) > + goto clean_up; > + > + pmu_copy_to_dmem(pmu, (in->alloc.dmem.offset), > + payload->in.buf, payload->in.size, 0); > + seq->in_gk20a.alloc.dmem.size = in->alloc.dmem.size; > + seq->in_gk20a.alloc.dmem.offset = in->alloc.dmem.offset; > + } > + > + if (payload && payload->out.offset != 0) { > + out = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd + > + payload->out.offset); > + out->alloc.dmem.size = (u16)payload->out.size; > + > + if (payload->out.buf != payload->in.buf) { > + err = pmu->dmem.alloc(&pmu->dmem, > + (void *)&out->alloc.dmem.offset, > + out->alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + if (err) > + goto clean_up; > + } else { > + BUG_ON(in == NULL); > + out->alloc.dmem.offset = in->alloc.dmem.offset; > + } > + > + seq->out_gk20a.alloc.dmem.size = out->alloc.dmem.size; > + seq->out_gk20a.alloc.dmem.offset = out->alloc.dmem.offset; > + } > + > + seq->state = PMU_SEQ_STATE_USED; > + err = pmu_write_cmd(pmu, cmd, queue_id, timeout); > + if (err) > + seq->state = PMU_SEQ_STATE_PENDING; > + > + nv_debug(ppmu, "cmd posted\n"); > + > + return 0; > + > +clean_up: > + nv_debug(ppmu, "cmd post failed\n"); > + if (in) > + pmu->dmem.free(&pmu->dmem, > + in->alloc.dmem.offset, > + in->alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + if (out) > + pmu->dmem.free(&pmu->dmem, > + out->alloc.dmem.offset, > + out->alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + > + pmu_seq_release(pmu, seq); > + return err; > +} > + > +void gk20a_pmu_isr(struct nvkm_pmu *ppmu) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct nvkm_mc *pmc = nvkm_mc(ppmu); > + struct pmu_queue *queue; > + u32 intr, mask; > + bool recheck = false; > + if (!pmu->isr_enabled) > + goto out; > + > + mask = nv_rd32(ppmu, 0x0010a018) & > + nv_rd32(ppmu, 0x0010a01c); > + > + intr = nv_rd32(ppmu, 0x0010a008) & mask; > + > + nv_debug(ppmu, "received falcon interrupt: 0x%08x", intr); > + pmu_enable_irq(ppmu, pmc, false); > + if (!intr || pmu->pmu_state == PMU_STATE_OFF) { > + nv_wr32(ppmu, 0x0010a004, intr); > + nv_error(ppmu, "pmu state off\n"); > + pmu_enable_irq(ppmu, pmc, true); > + goto out; > + } > + if (intr & 0x10) { > + nv_error(ppmu, > + "pmu halt intr not implemented"); > + pmu_dump_falcon_stats(pmu); > + } > + if (intr & 0x20) { > + nv_error(ppmu, > + "pmu exterr intr not implemented. Clearing interrupt."); > + pmu_dump_falcon_stats(pmu); > + > + nv_wr32(ppmu, 0x0010a16c, > + nv_rd32(ppmu, 0x0010a16c) & > + ~(0x1 << 31)); > + } > + if (intr & 0x40) { > + nv_debug(ppmu, "scheduling work\n"); > + schedule_work(&pmu->isr_workq); > + pmu_enable_irq(ppmu, pmc, true); > + recheck = true; > + } > + > + if (recheck) { > + queue = &pmu->queue[PMU_MESSAGE_QUEUE]; > + if (!pmu_queue_is_empty(pmu, queue)) > + nv_wr32(ppmu, 0x0010a000, 0x40); > + } else { > + pmu_enable_irq(ppmu, pmc, true); > + } > + > + pmu_enable_irq(ppmu, pmc, true); > + nv_wr32(ppmu, 0x0010a004, intr); > +out: > + nv_debug(ppmu, "irq handled\n"); > +} > + > +static int > +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw) > +{ > + int ret = 0; > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + u32 *ucode_image; > + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; > + int i; > + struct pmu_priv_vm *ppmuvm = &pmuvm; > + struct nvkm_device *device = nv_device(&ppmu->base); > + struct nvkm_vm *vm; > + u64 pmu_area_len = 300*1024; > + > + ppmu->pmuvm = &pmuvm; > + ppmu->pg_buf = &pmu->pg_buf; > + pmu->pmu = ppmu; > + /* mem for inst blk*/ > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x1000, 0, 0, > + &ppmuvm->mem); > + if (ret) > + goto instblk_alloc_err; > + > + /* mem for pgd*/ > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x8000, 0, 0, > + &ppmuvm->pgd); > + if (ret) > + goto pgd_alloc_err; > + > + /*allocate virtual memory range*/ > + ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm); > + if (ret) > + goto virt_alloc_err; > + > + atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]); > + /*update VM with pgd */ > + > + ret = nvkm_vm_ref(vm, &ppmuvm->vm, ppmuvm->pgd); > + if (ret) > + goto virt_alloc_err; > + > + /*update pgd in inst blk */ > + nv_wo32(ppmuvm->mem, 0x0200, lower_32_bits(ppmuvm->pgd->addr)); > + nv_wo32(ppmuvm->mem, 0x0204, upper_32_bits(ppmuvm->pgd->addr)); > + nv_wo32(ppmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1)); > + nv_wo32(ppmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1)); > + > + /* allocate memory for pmu fw to be copied to*/ > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, > + GK20A_PMU_UCODE_SIZE_MAX, 0x1000, 0, &pmu->ucode.pmubufobj); > + if (ret) > + goto fw_alloc_err; > + > + ucode_image = (u32 *)((u32)desc + desc->descriptor_size); > + for (i = 0; i < (desc->app_start_offset + desc->app_size) >> 2; i++) { > + nv_wo32(pmu->ucode.pmubufobj, i << 2, ucode_image[i]); > + pr_info("writing 0x%08x\n", ucode_image[i]); > + } > + /* map allocated memory into GMMU */ > + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->ucode.pmubufobj), vm, > + NV_MEM_ACCESS_RW, > + &pmu->ucode.pmubufvma); > + if (ret) > + goto map_err; > + > + nv_debug(ppmu, "%s function end\n", __func__); > + return ret; > +map_err: > + nvkm_gpuobj_destroy(pmu->ucode.pmubufobj); > +virt_alloc_err: > +fw_alloc_err: > + nvkm_gpuobj_destroy(ppmuvm->pgd); > +pgd_alloc_err: > + nvkm_gpuobj_destroy(ppmuvm->mem); > +instblk_alloc_err: > + return ret; > + > +} > + > +static int > +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw) > +{ > + struct nvkm_device *dev; > + char name[32]; > + > + dev = nv_device(ppmu); > + > + snprintf(name, sizeof(name), "nvidia/tegra124/%s", > + GK20A_PMU_UCODE_IMAGE); > + > + return request_firmware(pfw, name, nv_device_base(dev)); > +} > + > +static void > +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, > + const struct firmware *fw) > +{ > + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; > + > + nv_debug(ppmu, "GK20A PMU firmware information\n"); > + nv_debug(ppmu, "descriptor size = %u\n", desc->descriptor_size); > + nv_debug(ppmu, "image size = %u\n", desc->image_size); > + nv_debug(ppmu, "app_version = 0x%08x\n", desc->app_version); > + nv_debug(ppmu, "date = %s\n", desc->date); > + nv_debug(ppmu, "bootloader_start_offset = 0x%08x\n", > + desc->bootloader_start_offset); > + nv_debug(ppmu, "bootloader_size = 0x%08x\n", desc->bootloader_size); > + nv_debug(ppmu, "bootloader_imem_offset = 0x%08x\n", > + desc->bootloader_imem_offset); > + nv_debug(ppmu, "bootloader_entry_point = 0x%08x\n", > + desc->bootloader_entry_point); > + nv_debug(ppmu, "app_start_offset = 0x%08x\n", desc->app_start_offset); > + nv_debug(ppmu, "app_size = 0x%08x\n", desc->app_size); > + nv_debug(ppmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset); > + nv_debug(ppmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry); > + nv_debug(ppmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset); > + nv_debug(ppmu, "app_resident_code_offset = 0x%08x\n", > + desc->app_resident_code_offset); > + nv_debug(ppmu, "app_resident_code_size = 0x%08x\n", > + desc->app_resident_code_size); > + nv_debug(ppmu, "app_resident_data_offset = 0x%08x\n", > + desc->app_resident_data_offset); > + nv_debug(ppmu, "app_resident_data_size = 0x%08x\n", > + desc->app_resident_data_size); > + nv_debug(ppmu, "nb_overlays = %d\n", desc->nb_overlays); > + > + nv_debug(ppmu, "compressed = %u\n", desc->compressed); > +} > + > +static int pmu_process_init_msg(struct pmu_desc *pmu, > + struct pmu_msg *msg) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_init_msg_pmu_gk20a *init; > + struct pmu_sha1_gid_data gid_data; > + u32 i, tail = 0; > + > + tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff; > + > + pmu_copy_from_dmem(pmu, tail, > + (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); > + > + if (msg->hdr.unit_id != PMU_UNIT_INIT) { > + nv_error(ppmu, > + "expecting init msg"); > + return -EINVAL; > + } > + > + pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE, > + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); > + > + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { > + nv_error(ppmu, > + "expecting init msg"); > + return -EINVAL; > + } > + > + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); > + nv_wr32(ppmu, 0x0010a4cc, > + tail & 0xffffffff); > + > + init = &msg->msg.init.pmu_init_gk20a; > + if (!pmu->gid_info.valid) { > + > + pmu_copy_from_dmem(pmu, > + init->sw_managed_area_offset, > + (u8 *)&gid_data, > + sizeof(struct pmu_sha1_gid_data), 0); > + > + pmu->gid_info.valid > + (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE); > + > + if (pmu->gid_info.valid) { > + > + BUG_ON(sizeof(pmu->gid_info.gid) !> + sizeof(gid_data.gid)); > + > + memcpy(pmu->gid_info.gid, gid_data.gid, > + sizeof(pmu->gid_info.gid)); > + } > + } > + > + for (i = 0; i < PMU_QUEUE_COUNT; i++) > + pmu_queue_init(pmu, i, init); > + > + if (!pmu->dmem.alloc) > + nvkm_pmu_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", > + init->sw_managed_area_offset, > + init->sw_managed_area_size); > + > + pmu->pmu_ready = true; > + pmu->pmu_state = PMU_STATE_INIT_RECEIVED; > + > + return 0; > +} > + > +static bool pmu_read_message(struct pmu_desc *pmu, struct pmu_queue *queue, > + struct pmu_msg *msg, int *status) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + u32 read_size, bytes_read; > + int err; > + > + *status = 0; > + > + if (pmu_queue_is_empty(pmu, queue)) > + return false; > + > + err = pmu_queue_open_read(pmu, queue); > + if (err) { > + nv_error(ppmu, > + "fail to open queue %d for read", queue->id); > + *status = err; > + return false; > + } > + > + err = pmu_queue_pop(pmu, queue, &msg->hdr, > + PMU_MSG_HDR_SIZE, &bytes_read); > + if (err || bytes_read != PMU_MSG_HDR_SIZE) { > + nv_error(ppmu, > + "fail to read msg from queue %d", queue->id); > + *status = err | -EINVAL; > + goto clean_up; > + } > + > + if (msg->hdr.unit_id == PMU_UNIT_REWIND) { > + pmu_queue_rewind(pmu, queue); > + /* read again after rewind */ > + err = pmu_queue_pop(pmu, queue, &msg->hdr, > + PMU_MSG_HDR_SIZE, &bytes_read); > + if (err || bytes_read != PMU_MSG_HDR_SIZE) { > + nv_error(ppmu, > + "fail to read msg from queue %d", queue->id); > + *status = err | -EINVAL; > + goto clean_up; > + } > + } > + > + if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) { > + nv_error(ppmu, > + "read invalid unit_id %d from queue %d", > + msg->hdr.unit_id, queue->id); > + *status = -EINVAL; > + goto clean_up; > + } > + > + if (msg->hdr.size > PMU_MSG_HDR_SIZE) { > + read_size = msg->hdr.size - PMU_MSG_HDR_SIZE; > + err = pmu_queue_pop(pmu, queue, &msg->msg, > + read_size, &bytes_read); > + if (err || bytes_read != read_size) { > + nv_error(ppmu, > + "fail to read msg from queue %d", queue->id); > + *status = err; > + goto clean_up; > + } > + } > + > + err = pmu_queue_close(pmu, queue, true); > + if (err) { > + nv_error(ppmu, > + "fail to close queue %d", queue->id); > + *status = err; > + return false; > + } > + > + return true; > + > +clean_up: > + err = pmu_queue_close(pmu, queue, false); > + if (err) > + nv_error(ppmu, > + "fail to close queue %d", queue->id); > + return false; > +} > + > +static int pmu_response_handle(struct pmu_desc *pmu, > + struct pmu_msg *msg) > +{ > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct pmu_sequence *seq; > + int ret = 0; > + > + nv_debug(ppmu, "handling pmu response\n"); > + seq = &pmu->seq[msg->hdr.seq_id]; > + if (seq->state != PMU_SEQ_STATE_USED && > + seq->state != PMU_SEQ_STATE_CANCELLED) { > + nv_error(ppmu, > + "msg for an unknown sequence %d", seq->id); > + return -EINVAL; > + } > + > + if (msg->hdr.unit_id == PMU_UNIT_RC && > + msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { > + nv_error(ppmu, > + "unhandled cmd: seq %d", seq->id); > + } else if (seq->state != PMU_SEQ_STATE_CANCELLED) { > + if (seq->msg) { > + if (seq->msg->hdr.size >= msg->hdr.size) { > + memcpy(seq->msg, msg, msg->hdr.size); > + if (seq->out_gk20a.alloc.dmem.size != 0) { > + pmu_copy_from_dmem(pmu, > + seq->out_gk20a.alloc.dmem.offset, > + seq->out_payload, > + seq->out_gk20a.alloc.dmem.size, 0); > + } > + } else { > + nv_error(ppmu, > + "sequence %d msg buffer too small", > + seq->id); > + } > + } > + } else > + seq->callback = NULL; > + if (seq->in_gk20a.alloc.dmem.size != 0) > + pmu->dmem.free(&pmu->dmem, > + seq->in_gk20a.alloc.dmem.offset, > + seq->in_gk20a.alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + if (seq->out_gk20a.alloc.dmem.size != 0) > + pmu->dmem.free(&pmu->dmem, > + seq->out_gk20a.alloc.dmem.offset, > + seq->out_gk20a.alloc.dmem.size, > + PMU_DMEM_ALLOC_ALIGNMENT); > + > + if (seq->callback) > + seq->callback(ppmu, msg, seq->cb_params, seq->desc, ret); > + > + pmu_seq_release(pmu, seq); > + > + /* TBD: notify client waiting for available dmem */ > + nv_debug(ppmu, "pmu response processed\n"); > + > + return 0; > +} > + > +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout, > + u32 *var, u32 val); > + > + > +static int pmu_handle_event(struct pmu_desc *pmu, struct pmu_msg *msg) > +{ > + int err = 0; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + > + switch (msg->hdr.unit_id) { > + case PMU_UNIT_PERFMON: > + nv_debug(ppmu, "init perfmon event generated\n"); > + break; > + default: > + nv_debug(ppmu, "default event generated\n"); > + break; > + } > + > + return err; > +} > + > +void pmu_process_message(struct work_struct *work) > +{ > + struct pmu_desc *pmu = container_of(work, struct pmu_desc, isr_workq); > + struct pmu_msg msg; > + int status; > + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *) > + impl_from_pmu(pmu)); > + struct nvkm_mc *pmc = nvkm_mc(ppmu); > + > + mutex_lock(&pmu->isr_mutex); > + if (unlikely(!pmu->pmu_ready)) { > + nv_debug(ppmu, "processing init msg\n"); > + pmu_process_init_msg(pmu, &msg); > + mutex_unlock(&pmu->isr_mutex); > + pmu_enable_irq(ppmu, pmc, true); > + goto out; > + } > + > + while (pmu_read_message(pmu, > + &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) { > + > + nv_debug(ppmu, "read msg hdr:\n" > + "unit_id = 0x%08x, size = 0x%08x,\n" > + "ctrl_flags = 0x%08x, seq_id = 0x%08x\n", > + msg.hdr.unit_id, msg.hdr.size, > + msg.hdr.ctrl_flags, msg.hdr.seq_id); > + > + msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK; > + > + if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT) > + pmu_handle_event(pmu, &msg); > + else > + pmu_response_handle(pmu, &msg); > + } > + mutex_unlock(&pmu->isr_mutex); > + pmu_enable_irq(ppmu, pmc, true); > +out: > + nv_debug(ppmu, "exit %s\n", __func__); > +} > + > +int gk20a_pmu_destroy(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + > + /* make sure the pending operations are finished before we continue */ > + cancel_work_sync(&pmu->isr_workq); > + pmu->initialized = false; > + > + mutex_lock(&pmu->isr_mutex); > + pmu_enable(ppmu, pmc, false); > + pmu->isr_enabled = false; > + mutex_unlock(&pmu->isr_mutex); > + > + pmu->pmu_state = PMU_STATE_OFF; > + pmu->pmu_ready = false; > + pmu->zbc_ready = false; > + > + return 0; > +} > + > +int gk20a_pmu_load_norm(struct nvkm_pmu *ppmu, u32 *load) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + *load = pmu->load_shadow; > + return 0; > +} > + > +int gk20a_pmu_load_update(struct nvkm_pmu *ppmu) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + u16 _load = 0; > + > + pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); > + pmu->load_shadow = _load / 10; > + pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); > + > + return 0; > +} > + > +void gk20a_pmu_get_load_counters(struct nvkm_pmu *ppmu, u32 *busy_cycles, > + u32 *total_cycles) > +{ > + /*todo if (!g->power_on || gk20a_busy(g->dev)) { > + *busy_cycles = 0; > + *total_cycles = 0; > + return; > + }*/ > + > + *busy_cycles = nv_rd32(ppmu, 0x0010a508 + 16) & 0x7fffffff; > + /*todormb();*/ > + *total_cycles = nv_rd32(ppmu, 0x0010a508 + 32) & 0x7fffffff; > + /*todogk20a_idle(g->dev);*/ > +} > + > +void gk20a_pmu_reset_load_counters(struct nvkm_pmu *ppmu) > +{ > + u32 reg_val = 1 << 31; > + > + /*todoif (!g->power_on || gk20a_busy(g->dev)) > + return;*/ > + > + nv_wr32(ppmu, 0x0010a508 + 32, reg_val); > + /*todowmb()*/; > + nv_wr32(ppmu, 0x0010a508 + 16, reg_val); > + /*todogk20a_idle(g->dev);*/ > +} > + > +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + int err; > + > + mutex_lock(&pmu->isr_mutex); > + pmu_reset(ppmu, pmc); > + pmu->isr_enabled = true; > + mutex_unlock(&pmu->isr_mutex); > + > + /* setup apertures - virtual */ > + nv_wr32(ppmu, 0x10a600 + 0 * 4, 0x0); > + nv_wr32(ppmu, 0x10a600 + 1 * 4, 0x0); > + /* setup apertures - physical */ > + nv_wr32(ppmu, 0x10a600 + 2 * 4, 0x4 | 0x0); > + nv_wr32(ppmu, 0x10a600 + 3 * 4, 0x4 | 0x1); > + nv_wr32(ppmu, 0x10a600 + 4 * 4, 0x4 | 0x2); > + > + /* TBD: load pmu ucode */ > + err = pmu_bootstrap(pmu); > + if (err) > + return err; > + > + return 0; > + > +} > + > +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu) > +{ > + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu); > + struct pmu_desc *pmu = &impl->pmudata; > + struct pmu_priv_vm *ppmuvm = &pmuvm; > + int i, err = 0; > + int ret = 0; > + > + > + if (pmu->sw_ready) { > + > + for (i = 0; i < pmu->mutex_cnt; i++) { > + pmu->mutex[i].id = i; > + pmu->mutex[i].index = i; > + } > + pmu_seq_init(pmu); > + > + nv_debug(ppmu, "skipping init\n"); > + goto skip_init; > + } > + > + /* no infoRom script from vbios? */ > + > + /* TBD: sysmon subtask */ > + > + pmu->mutex_cnt = 0x00000010; > + pmu->mutex = kzalloc(pmu->mutex_cnt * > + sizeof(struct pmu_mutex), GFP_KERNEL); > + if (!pmu->mutex) { > + err = -ENOMEM; > + nv_error(ppmu, "not enough space ENOMEM\n"); > + goto err; > + } > + > + for (i = 0; i < pmu->mutex_cnt; i++) { > + pmu->mutex[i].id = i; > + pmu->mutex[i].index = i; > + } > + > + pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES * > + sizeof(struct pmu_sequence), GFP_KERNEL); > + if (!pmu->seq) { > + err = -ENOMEM; > + nv_error(ppmu, "not enough space ENOMEM\n"); > + goto err_free_mutex; > + } > + > + pmu_seq_init(pmu); > + > + INIT_WORK(&pmu->isr_workq, pmu_process_message); > + init_waitqueue_head(&ppmu->init_wq); > + ppmu->gr_initialised = false; > + > + /* allocate memory for pmu fw area */ > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_SEQ_BUF_SIZE, > + 0x1000, 0, &pmu->seq_buf.pmubufobj); > + if (ret) > + return ret; > + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_TRACE_BUFSIZE, > + 0, 0, &pmu->trace_buf.pmubufobj); > + if (ret) > + return ret; > + /* map allocated memory into GMMU */ > + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->seq_buf.pmubufobj), > + ppmuvm->vm, > + NV_MEM_ACCESS_RW, > + &pmu->seq_buf.pmubufvma); > + if (ret) > + return ret; > + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->trace_buf.pmubufobj), > + ppmuvm->vm, > + NV_MEM_ACCESS_RW, > + &pmu->trace_buf.pmubufvma); > + if (ret) > + return ret; > + > + /* TBD: remove this if ZBC save/restore is handled by PMU > + * end an empty ZBC sequence for now */ > + nv_wo32(pmu->seq_buf.pmubufobj, 0, 0x16); > + nv_wo32(pmu->seq_buf.pmubufobj, 1, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 2, 0x01); > + nv_wo32(pmu->seq_buf.pmubufobj, 3, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 4, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 5, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 6, 0x00); > + nv_wo32(pmu->seq_buf.pmubufobj, 7, 0x00); > + > + pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE; > + ret = gk20a_pmu_debugfs_init(ppmu); > + if (ret) > + return ret; > + > + pmu->sw_ready = true; > + > +skip_init: > + return 0; > +err_free_mutex: > + kfree(pmu->mutex); > +err: > + return err; > +} > + > +static void > +gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable) > +{ > + /* > + nv_mask(ppmu, 0x000200, 0x00001000, 0x00000000); > + nv_rd32(ppmu, 0x000200); > + nv_mask(ppmu, 0x000200, 0x08000000, 0x08000000); > + > + msleep(50); > + > + nv_mask(ppmu, 0x000200, 0x08000000, 0x00000000); > + nv_mask(ppmu, 0x000200, 0x00001000, 0x00001000); > + nv_rd32(ppmu, 0x000200); > + */ > +} > + > +static void gk20a_pmu_intr(struct nvkm_subdev *subdev) > +{ > + struct nvkm_pmu *ppmu = nvkm_pmu(subdev); > + > + gk20a_pmu_isr(ppmu); > +} > + > +void gk20a_remove_pmu_support(struct pmu_desc *pmu) > +{ > + nvkm_pmu_allocator_destroy(&pmu->dmem); > +} > + > +int gk20a_message(struct nvkm_pmu *ppmu, u32 reply[2], > + u32 process, u32 message, u32 data0, u32 data1) > +{ > + return -EPERM; > +} > + > +int > +gk20a_pmu_create_(struct nvkm_object *parent, > + struct nvkm_object *engine, > + struct nvkm_oclass *oclass, int length, void **pobject) > +{ > + struct nvkm_pmu *ppmu; > + struct nvkm_device *device = nv_device(parent); > + int ret; > + > + ret = nvkm_subdev_create_(parent, engine, oclass, 0, "PPMU", > + "pmu", length, pobject); > + ppmu = *pobject; > + if (ret) > + return ret; > + > + ret = nv_device_get_irq(device, true); > + > + ppmu->message = gk20a_message; > + ppmu->pgob = gk20a_pmu_pgob; > + ppmu->pmu_mutex_acquire = pmu_mutex_acquire; > + ppmu->pmu_mutex_release = pmu_mutex_release; > + ppmu->pmu_load_norm = gk20a_pmu_load_norm; > + ppmu->pmu_load_update = gk20a_pmu_load_update; > + ppmu->pmu_reset_load_counters = gk20a_pmu_reset_load_counters; > + ppmu->pmu_get_load_counters = gk20a_pmu_get_load_counters; > + > + return 0; > +} > + > + > + > diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.h b/drm/nouveau/nvkm/subdev/pmu/gk20a.h > new file mode 100644 > index 000000000000..a084d6d518b4 > --- /dev/null > +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.h > @@ -0,0 +1,369 @@ > +#ifndef __NVKM_pmu_GK20A_H__ > +#define __NVKM_pmu_GK20A_H__ > + > +/* > + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > + * DEALINGS IN THE SOFTWARE. > + */ > +void pmu_setup_hw(struct pmu_desc *pmu); > +void gk20a_remove_pmu_support(struct pmu_desc *pmu); > +#define gk20a_pmu_create(p, e, o, d) \ > + gk20a_pmu_create_((p), (e), (o), sizeof(**d), (void **)d) > + > +int gk20a_pmu_create_(struct nvkm_object *, struct nvkm_object *, > + struct nvkm_oclass *, int, void **); > +/* defined by pmu hw spec */ > +#define GK20A_PMU_VA_SIZE (512 * 1024 * 1024) > +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) > +#define GK20A_PMU_SEQ_BUF_SIZE 4096 > +/* idle timeout */ > +#define GK20A_IDLE_CHECK_DEFAULT 100 /* usec */ > +#define GK20A_IDLE_CHECK_MAX 5000 /* usec */ > + > +/* so far gk20a has two engines: gr and ce2(gr_copy) */ > +enum { > + ENGINE_GR_GK20A = 0, > + ENGINE_CE2_GK20A = 1, > + ENGINE_INVAL_GK20A > +}; > + > +#define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe) > + > +#define APP_VERSION_GK20A 17997577 > + > +enum { > + GK20A_PMU_DMAIDX_UCODE = 0, > + GK20A_PMU_DMAIDX_VIRT = 1, > + GK20A_PMU_DMAIDX_PHYS_VID = 2, > + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, > + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, > + GK20A_PMU_DMAIDX_RSVD = 5, > + GK20A_PMU_DMAIDX_PELPG = 6, > + GK20A_PMU_DMAIDX_END = 7 > +}; > + > +struct pmu_mem_gk20a { > + u32 dma_base; > + u8 dma_offset; > + u8 dma_idx; > + u16 fb_size; > +}; > + > +struct pmu_dmem { > + u16 size; > + u32 offset; > +}; > + > +struct pmu_cmdline_args_gk20a { > + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ > + u32 falc_trace_size; /* falctrace buffer size (bytes) */ > + u32 falc_trace_dma_base; /* 256-byte block address */ > + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ > + u8 secure_mode; > + struct pmu_mem_gk20a gc6_ctx; /* dmem offset of gc6 context */ > +}; > + > +#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */ > +#define GK20A_PMU_DMEM_BLKSIZE2 8 > + > +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 > +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 > + > +struct pmu_ucode_desc { > + u32 descriptor_size; > + u32 image_size; > + u32 tools_version; > + u32 app_version; > + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; > + u32 bootloader_start_offset; > + u32 bootloader_size; > + u32 bootloader_imem_offset; > + u32 bootloader_entry_point; > + u32 app_start_offset; > + u32 app_size; > + u32 app_imem_offset; > + u32 app_imem_entry; > + u32 app_dmem_offset; > + u32 app_resident_code_offset; /* Offset from appStartOffset */ > +/* Exact size of the resident code > + * ( potentially contains CRC inside at the end ) */ > + u32 app_resident_code_size; > + u32 app_resident_data_offset; /* Offset from appStartOffset */ > +/* Exact size of the resident data > + * ( potentially contains CRC inside at the end ) */ > + u32 app_resident_data_size; > + u32 nb_overlays; > + struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; > + u32 compressed; > +}; > + > +#define PMU_UNIT_REWIND (0x00) > +#define PMU_UNIT_PG (0x03) > +#define PMU_UNIT_INIT (0x07) > +#define PMU_UNIT_PERFMON (0x12) > +#define PMU_UNIT_THERM (0x1B) > +#define PMU_UNIT_RC (0x1F) > +#define PMU_UNIT_NULL (0x20) > +#define PMU_UNIT_END (0x23) > + > +#define PMU_UNIT_TEST_START (0xFE) > +#define PMU_UNIT_END_SIM (0xFF) > +#define PMU_UNIT_TEST_END (0xFF) > + > +#define PMU_UNIT_ID_IS_VALID(id) \ > + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) > + > +#define PMU_DMEM_ALLOC_ALIGNMENT (32) > +#define PMU_DMEM_ALIGNMENT (4) > + > +#define PMU_CMD_FLAGS_PMU_MASK (0xF0) > + > +#define PMU_CMD_FLAGS_STATUS BIT(0) > +#define PMU_CMD_FLAGS_INTR BIT(1) > +#define PMU_CMD_FLAGS_EVENT BIT(2) > +#define PMU_CMD_FLAGS_WATERMARK BIT(3) > + > +struct pmu_hdr { > + u8 unit_id; > + u8 size; > + u8 ctrl_flags; > + u8 seq_id; > +}; > +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) > +#define PMU_CMD_HDR_SIZE sizeof(struct pmu_hdr) > + > + > +struct pmu_allocation_gk20a { > + struct { > + struct pmu_dmem dmem; > + struct pmu_mem_gk20a fb; > + } alloc; > +}; > + > +enum { > + PMU_INIT_MSG_TYPE_PMU_INIT = 0, > +}; > + > +struct pmu_init_msg_pmu_gk20a { > + u8 msg_type; > + u8 pad; > + u16 os_debug_entry_point; > + > + struct { > + u16 size; > + u16 offset; > + u8 index; > + u8 pad; > + } queue_info[PMU_QUEUE_COUNT]; > + > + u16 sw_managed_area_offset; > + u16 sw_managed_area_size; > +}; > + > +struct pmu_init_msg { > + union { > + u8 msg_type; > + struct pmu_init_msg_pmu_gk20a pmu_init_gk20a; > + }; > +}; > + > + > +enum { > + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, > +}; > + > +struct pmu_rc_msg_unhandled_cmd { > + u8 msg_type; > + u8 unit_id; > +}; > + > +struct pmu_rc_msg { > + u8 msg_type; > + struct pmu_rc_msg_unhandled_cmd unhandled_cmd; > +}; > + > +/* PERFMON */ > +#define PMU_DOMAIN_GROUP_PSTATE 0 > +#define PMU_DOMAIN_GROUP_GPC2CLK 1 > +#define PMU_DOMAIN_GROUP_NUM 2 > +struct pmu_perfmon_counter_gk20a { > + u8 index; > + u8 flags; > + u8 group_id; > + u8 valid; > + u16 upper_threshold; /* units of 0.01% */ > + u16 lower_threshold; /* units of 0.01% */ > +}; > +struct pmu_zbc_cmd { > + u8 cmd_type; > + u8 pad; > + u16 entry_mask; > +}; > + > +/* PERFMON MSG */ > +enum { > + PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0, > + PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1, > + PMU_PERFMON_MSG_ID_INIT_EVENT = 2, > + PMU_PERFMON_MSG_ID_ACK = 3 > +}; > + > +struct pmu_perfmon_msg_generic { > + u8 msg_type; > + u8 state_id; > + u8 group_id; > + u8 data; > +}; > + > +struct pmu_perfmon_msg { > + union { > + u8 msg_type; > + struct pmu_perfmon_msg_generic gen; > + }; > +}; > + > + > +struct pmu_cmd { > + struct pmu_hdr hdr; > + union { > + struct pmu_zbc_cmd zbc; > + } cmd; > +}; > + > +struct pmu_msg { > + struct pmu_hdr hdr; > + union { > + struct pmu_init_msg init; > + struct pmu_perfmon_msg perfmon; > + struct pmu_rc_msg rc; > + } msg; > +}; > + > +/* write by sw, read by pmu, protected by sw mutex lock */ > +#define PMU_COMMAND_QUEUE_HPQ 0 > +/* write by sw, read by pmu, protected by sw mutex lock */ > +#define PMU_COMMAND_QUEUE_LPQ 1 > +/* write by pmu, read by sw, accessed by interrupt handler, no lock */ > +#define PMU_MESSAGE_QUEUE 4 > +#define PMU_QUEUE_COUNT 5 > + > +enum { > + PMU_MUTEX_ID_RSVD1 = 0, > + PMU_MUTEX_ID_GPUSER, > + PMU_MUTEX_ID_GPMUTEX, > + PMU_MUTEX_ID_I2C, > + PMU_MUTEX_ID_RMLOCK, > + PMU_MUTEX_ID_MSGBOX, > + PMU_MUTEX_ID_FIFO, > + PMU_MUTEX_ID_PG, > + PMU_MUTEX_ID_GR, > + PMU_MUTEX_ID_CLK, > + PMU_MUTEX_ID_RSVD6, > + PMU_MUTEX_ID_RSVD7, > + PMU_MUTEX_ID_RSVD8, > + PMU_MUTEX_ID_RSVD9, > + PMU_MUTEX_ID_INVALID > +}; > + > +#define PMU_IS_COMMAND_QUEUE(id) \ > + ((id) < PMU_MESSAGE_QUEUE) > + > +#define PMU_IS_SW_COMMAND_QUEUE(id) \ > + (((id) == PMU_COMMAND_QUEUE_HPQ) || \ > + ((id) == PMU_COMMAND_QUEUE_LPQ)) > + > +#define PMU_IS_MESSAGE_QUEUE(id) \ > + ((id) == PMU_MESSAGE_QUEUE) > + > +enum { > + OFLAG_READ = 0, > + OFLAG_WRITE > +}; > + > +#define QUEUE_SET (true) > + /*todo find how to get cpu_pa*/ > +#define QUEUE_GET (false) > + > +#define QUEUE_ALIGNMENT (4) > + > +#define PMU_PGENG_GR_BUFFER_IDX_INIT (0) > +#define PMU_PGENG_GR_BUFFER_IDX_ZBC (1) > +#define PMU_PGENG_GR_BUFFER_IDX_FECS (2) > + > +enum { > + PMU_DMAIDX_UCODE = 0, > + PMU_DMAIDX_VIRT = 1, > + PMU_DMAIDX_PHYS_VID = 2, > + PMU_DMAIDX_PHYS_SYS_COH = 3, > + PMU_DMAIDX_PHYS_SYS_NCOH = 4, > + PMU_DMAIDX_RSVD = 5, > + PMU_DMAIDX_PELPG = 6, > + PMU_DMAIDX_END = 7 > +}; > + > +#define PMU_MUTEX_ID_IS_VALID(id) \ > + ((id) < PMU_MUTEX_ID_INVALID) > + > +#define PMU_INVALID_MUTEX_OWNER_ID (0) > + > +struct pmu_mutex { > + u32 id; > + u32 index; > + u32 ref_cnt; > +}; > + > + > +#define PMU_INVALID_SEQ_DESC (~0) > + > +enum { > + PMU_SEQ_STATE_FREE = 0, > + PMU_SEQ_STATE_PENDING, > + PMU_SEQ_STATE_USED, > + PMU_SEQ_STATE_CANCELLED > +}; > + > +struct pmu_payload { > + struct { > + void *buf; > + u32 offset; > + u32 size; > + } in, out; > +}; > + > +typedef void (*pmu_callback)(struct nvkm_pmu *, struct pmu_msg *, void *, > +u32, u32); > + > +struct pmu_sequence { > + u8 id; > + u32 state; > + u32 desc; > + struct pmu_msg *msg; > + struct pmu_allocation_gk20a in_gk20a; > + struct pmu_allocation_gk20a out_gk20a; > + u8 *out_payload; > + pmu_callback callback; > + void *cb_params; > +}; > +struct pmu_gk20a_data { > + struct pmu_perfmon_counter_gk20a perfmon_counter_gk20a; > + u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM]; > +}; > + > +#endif /*_GK20A_H__*/ > diff --git a/drm/nouveau/nvkm/subdev/pmu/priv.h b/drm/nouveau/nvkm/subdev/pmu/priv.h > index 998410563bfd..c4686e418582 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/priv.h > +++ b/drm/nouveau/nvkm/subdev/pmu/priv.h > @@ -2,7 +2,91 @@ > #define __NVKM_PMU_PRIV_H__ > #include <subdev/pmu.h> > #include <subdev/pmu/fuc/os.h> > +#include <core/object.h> > +#include <core/device.h> > +#include <core/parent.h> > +#include <core/mm.h> > +#include <linux/rwsem.h> > +#include <linux/slab.h> > +#include <subdev/mmu.h> > +#include <core/gpuobj.h> > > +static inline u32 u64_hi32(u64 n) > +{ > + return (u32)((n >> 32) & ~(u32)0); > +} > + > +static inline u32 u64_lo32(u64 n) > +{ > + return (u32)(n & ~(u32)0); > +} > + > +/* #define ALLOCATOR_DEBUG */ > + > +/* main struct */ > +struct nvkm_pmu_allocator { > + > + char name[32]; /* name for allocator */ > +/*struct rb_root rb_root;*/ /* rb tree root for blocks */ > + > + u32 base; /* min value of this linear space */ > + u32 limit; /* max value = limit - 1 */ > + > + unsigned long *bitmap; /* bitmap */ > + > + struct gk20a_alloc_block *block_first; /* first block in list */ > + struct gk20a_alloc_block *block_recent; /* last visited block */ > + > + u32 first_free_addr; /* first free addr, non-contigous > + allocation preferred start, > + in order to pick up small holes */ > + u32 last_free_addr; /* last free addr, contiguous > + allocation preferred start */ > + u32 cached_hole_size; /* max free hole size up to > + last_free_addr */ > + u32 block_count; /* number of blocks */ > + > + struct rw_semaphore rw_sema; /* lock */ > + struct kmem_cache *block_cache; /* slab cache */ > + > + /* if enabled, constrain to [base, limit) */ > + struct { > + bool enable; > + u32 base; > + u32 limit; > + } constraint; > + > + int (*alloc)(struct nvkm_pmu_allocator *allocator, > + u32 *addr, u32 len, u32 align); > + int (*free)(struct nvkm_pmu_allocator *allocator, > + u32 addr, u32 len, u32 align); > + > +}; > + > +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator, > + const char *name, u32 base, u32 size); > +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator); > + > +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator, > + u32 *addr, u32 len, u32 align); > + > +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator, > + u32 addr, u32 len, u32 align); > + > +#if defined(ALLOCATOR_DEBUG) > + > +#define allocator_dbg(alloctor, format, arg...) \ > +do { \ > + if (1) \ > + pr_debug("nvkm_pmu_allocator (%s) %s: " format "\n",\ > + alloctor->name, __func__, ##arg);\ > +} while (0) > + > +#else /* ALLOCATOR_DEBUG */ > + > +#define allocator_dbg(format, arg...) > + > +#endif /* ALLOCATOR_DEBUG */ > #define nvkm_pmu_create(p, e, o, d) \ > nvkm_pmu_create_((p), (e), (o), sizeof(**d), (void **)d) > #define nvkm_pmu_destroy(p) \ > @@ -26,6 +110,179 @@ int _nvkm_pmu_ctor(struct nvkm_object *, struct nvkm_object *, > int _nvkm_pmu_init(struct nvkm_object *); > int _nvkm_pmu_fini(struct nvkm_object *, bool); > void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable); > +#define PMU_PG_IDLE_THRESHOLD 15000 > +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000 > + > +/* state transition : > + OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF > + ON => OFF is always synchronized */ > +#define PMU_ELPG_STAT_OFF 0 /* elpg is off */ > +#define PMU_ELPG_STAT_ON 1 /* elpg is on */ > +/* elpg is off, ALLOW cmd has been sent, wait for ack */ > +#define PMU_ELPG_STAT_ON_PENDING 2 > +/* elpg is on, DISALLOW cmd has been sent, wait for ack */ > +#define PMU_ELPG_STAT_OFF_PENDING 3 > +/* elpg is off, caller has requested on, but ALLOW > +cmd hasn't been sent due to ENABLE_ALLOW delay */ > +#define PMU_ELPG_STAT_OFF_ON_PENDING 4 > + > +/* Falcon Register index */ > +#define PMU_FALCON_REG_R0 (0) > +#define PMU_FALCON_REG_R1 (1) > +#define PMU_FALCON_REG_R2 (2) > +#define PMU_FALCON_REG_R3 (3) > +#define PMU_FALCON_REG_R4 (4) > +#define PMU_FALCON_REG_R5 (5) > +#define PMU_FALCON_REG_R6 (6) > +#define PMU_FALCON_REG_R7 (7) > +#define PMU_FALCON_REG_R8 (8) > +#define PMU_FALCON_REG_R9 (9) > +#define PMU_FALCON_REG_R10 (10) > +#define PMU_FALCON_REG_R11 (11) > +#define PMU_FALCON_REG_R12 (12) > +#define PMU_FALCON_REG_R13 (13) > +#define PMU_FALCON_REG_R14 (14) > +#define PMU_FALCON_REG_R15 (15) > +#define PMU_FALCON_REG_IV0 (16) > +#define PMU_FALCON_REG_IV1 (17) > +#define PMU_FALCON_REG_UNDEFINED (18) > +#define PMU_FALCON_REG_EV (19) > +#define PMU_FALCON_REG_SP (20) > +#define PMU_FALCON_REG_PC (21) > +#define PMU_FALCON_REG_IMB (22) > +#define PMU_FALCON_REG_DMB (23) > +#define PMU_FALCON_REG_CSW (24) > +#define PMU_FALCON_REG_CCR (25) > +#define PMU_FALCON_REG_SEC (26) > +#define PMU_FALCON_REG_CTX (27) > +#define PMU_FALCON_REG_EXCI (28) > +#define PMU_FALCON_REG_RSVD0 (29) > +#define PMU_FALCON_REG_RSVD1 (30) > +#define PMU_FALCON_REG_RSVD2 (31) > +#define PMU_FALCON_REG_SIZE (32) > + > +/* Choices for pmu_state */ > +#define PMU_STATE_OFF 0 /* PMU is off */ > +#define PMU_STATE_STARTING 1 /* PMU is on, but not booted */ > +#define PMU_STATE_INIT_RECEIVED 2 /* PMU init message received */ > +#define PMU_STATE_ELPG_BOOTING 3 /* PMU is booting */ > +#define PMU_STATE_ELPG_BOOTED 4 /* ELPG is initialized */ > +#define PMU_STATE_LOADING_PG_BUF 5 /* Loading PG buf */ > +#define PMU_STATE_LOADING_ZBC 6 /* Loading ZBC buf */ > +#define PMU_STATE_STARTED 7 /* Fully unitialized */ > + > +#define PMU_QUEUE_COUNT 5 > + > +#define PMU_MAX_NUM_SEQUENCES (256) > +#define PMU_SEQ_BIT_SHIFT (5) > +#define PMU_SEQ_TBL_SIZE \ > + (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT) > + > +#define PMU_SHA1_GID_SIGNATURE 0xA7C66AD2 > +#define PMU_SHA1_GID_SIGNATURE_SIZE 4 > + > +#define PMU_SHA1_GID_SIZE 16 > + > +struct pmu_queue { > + > + /* used by hw, for BIOS/SMI queue */ > + u32 mutex_id; > + u32 mutex_lock; > + /* used by sw, for LPQ/HPQ queue */ > + struct mutex mutex; > + > + /* current write position */ > + u32 position; > + /* physical dmem offset where this queue begins */ > + u32 offset; > + /* logical queue identifier */ > + u32 id; > + /* physical queue index */ > + u32 index; > + /* in bytes */ > + u32 size; > + > + /* open-flag */ > + u32 oflag; > + bool opened; /* opened implies locked */ > +}; > + > +struct pmu_sha1_gid { > + bool valid; > + u8 gid[PMU_SHA1_GID_SIZE]; > +}; > + > +struct pmu_sha1_gid_data { > + u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE]; > + u8 gid[PMU_SHA1_GID_SIZE]; > +}; > + > +struct pmu_desc { > + > + struct pmu_ucode_desc *desc; > + struct pmu_buf_desc ucode; > + > + struct pmu_buf_desc pg_buf; > + /* TBD: remove this if ZBC seq is fixed */ > + struct pmu_buf_desc seq_buf; > + struct pmu_buf_desc trace_buf; > + bool buf_loaded; > + > + struct pmu_sha1_gid gid_info; > + > + struct pmu_queue queue[PMU_QUEUE_COUNT]; > + > + struct pmu_sequence *seq; > + unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE]; > + u32 next_seq_desc; > + > + struct pmu_mutex *mutex; > + u32 mutex_cnt; > + > + struct mutex pmu_copy_lock; > + struct mutex pmu_seq_lock; > + > + struct nvkm_pmu_allocator dmem; > + > + u32 *ucode_image; > + bool pmu_ready; > + > + u32 zbc_save_done; > + > + u32 stat_dmem_offset; > + > + u32 elpg_stat; > + > + int pmu_state; > + > +#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */ > + struct work_struct isr_workq; > + struct mutex elpg_mutex; /* protect elpg enable/disable */ > +/* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */ > + int elpg_refcnt; > + > + bool initialized; > + > + void (*remove_support)(struct pmu_desc *pmu); > + bool sw_ready; > + bool perfmon_ready; > + > + u32 sample_buffer; > + u32 load_shadow; > + u32 load_avg; > + > + struct mutex isr_mutex; > + bool isr_enabled; > + > + bool zbc_ready; > + unsigned long perfmon_events_cnt; > + bool perfmon_sampling_enabled; > + u8 pmu_mode; > + u32 falcon_id; > + u32 aelpg_param[5]; > + void *pmu_chip_data; > + struct nvkm_pmu *pmu; > +}; > > struct nvkm_pmu_impl { > struct nvkm_oclass base; > @@ -39,5 +296,12 @@ struct nvkm_pmu_impl { > } data; > > void (*pgob)(struct nvkm_pmu *, bool); > + struct pmu_desc pmudata; > }; > + > +static inline struct nvkm_pmu *impl_from_pmu(struct pmu_desc *pmu) > +{ > + return pmu->pmu; > +} > + > #endif > -- > 1.9.1 > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
Due to the length of the patch there are many things to fix. This review alone won't cover all of them, but is mainly an attempt to reduce the amount of code and to split this. On Wed, Mar 11, 2015 at 3:33 PM, Deepak Goyal <dgoyal at nvidia.com> wrote: > It adds PMU boot support.It loads PMU > firmware into PMU falcon.RM/Kernel driver > receives INIT ack (through interrupt mechanism) > from PMU when PMU boots with success. This commit log is strangely formatted. You want to break lines of git commit lots around column 70, not 50. Also don't forget the space after the end of your sentences. The log itself also lacks informative value, especially considering the length of this patch. Please assume your reader is completely unfamiliar with your work, and explain in detail what your patch does, even the parts that seem obvious. Some questions that come to mind when reading the log: - What is the PMU firmware do? - What is RM? (this is not the terminology used by Nouveau, so better to avoid using it altogether) - What value does this patch add to the project? I understand that this patch clears the way for follow-up patches that will actually add features. Please state this clearly in the log, and explain what these features are. No code can be merged upstream until its benefits are clearly understood. Review follows, I have changed the order of files to comment on the structures before the code. > diff --git a/drm/nouveau/nvkm/subdev/pmu/priv.h b/drm/nouveau/nvkm/subdev/pmu/priv.h > index 998410563bfd..c4686e418582 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/priv.h > +++ b/drm/nouveau/nvkm/subdev/pmu/priv.h > @@ -2,7 +2,91 @@ > #define __NVKM_PMU_PRIV_H__ > #include <subdev/pmu.h> > #include <subdev/pmu/fuc/os.h> > +#include <core/object.h> > +#include <core/device.h> > +#include <core/parent.h> > +#include <core/mm.h> > +#include <linux/rwsem.h> > +#include <linux/slab.h> > +#include <subdev/mmu.h> > +#include <core/gpuobj.h> > > +static inline u32 u64_hi32(u64 n) > +{ > + return (u32)((n >> 32) & ~(u32)0); > +} > + > +static inline u32 u64_lo32(u64 n) > +{ > + return (u32)(n & ~(u32)0); > +} Use the lower_32_bits() and upper_32_bits() macros instead. > + > +/* #define ALLOCATOR_DEBUG */ This line is useless... > + > +/* main struct */ ... and this comment uninformative. > +struct nvkm_pmu_allocator { > + > + char name[32]; /* name for allocator */ > +/*struct rb_root rb_root;*/ /* rb tree root for blocks */ Do not comment out members that we don't need. If it's unneeded, just remove it. > + > + u32 base; /* min value of this linear space */ > + u32 limit; /* max value = limit - 1 */ > + > + unsigned long *bitmap; /* bitmap */ > + > + struct gk20a_alloc_block *block_first; /* first block in list */ > + struct gk20a_alloc_block *block_recent; /* last visited block */ > + > + u32 first_free_addr; /* first free addr, non-contigous > + allocation preferred start, > + in order to pick up small holes */ > + u32 last_free_addr; /* last free addr, contiguous > + allocation preferred start */ > + u32 cached_hole_size; /* max free hole size up to > + last_free_addr */ > + u32 block_count; /* number of blocks */ > + > + struct rw_semaphore rw_sema; /* lock */ > + struct kmem_cache *block_cache; /* slab cache */ > + > + /* if enabled, constrain to [base, limit) */ > + struct { > + bool enable; > + u32 base; > + u32 limit; > + } constraint; > + > + int (*alloc)(struct nvkm_pmu_allocator *allocator, > + u32 *addr, u32 len, u32 align); > + int (*free)(struct nvkm_pmu_allocator *allocator, > + u32 addr, u32 len, u32 align); > + > +}; > + > +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator, > + const char *name, u32 base, u32 size); > +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator); > + > +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator, > + u32 *addr, u32 len, u32 align); > + > +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator, > + u32 addr, u32 len, u32 align); So from the nvkm_pmu_allocator struct and these function prototypes, this looks like a pretty casual address space allocator. Nouveau already has such an allocator: nvkm_mm. Check it out, it will do all that you need and you can remove a lot of code from this patch. > + > +#if defined(ALLOCATOR_DEBUG) > + > +#define allocator_dbg(alloctor, format, arg...) \ > +do { \ > + if (1) \ > + pr_debug("nvkm_pmu_allocator (%s) %s: " format "\n",\ > + alloctor->name, __func__, ##arg);\ > +} while (0) > + > +#else /* ALLOCATOR_DEBUG */ > + > +#define allocator_dbg(format, arg...) I'd prefer if you use the nv_debug() macro in place of this one, but it will go away with the allocator anyway... > + > +#endif /* ALLOCATOR_DEBUG */ > #define nvkm_pmu_create(p, e, o, d) \ > nvkm_pmu_create_((p), (e), (o), sizeof(**d), (void **)d) > #define nvkm_pmu_destroy(p) \ > @@ -26,6 +110,179 @@ int _nvkm_pmu_ctor(struct nvkm_object *, struct nvkm_object *, > int _nvkm_pmu_init(struct nvkm_object *); > int _nvkm_pmu_fini(struct nvkm_object *, bool); > void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable); > +#define PMU_PG_IDLE_THRESHOLD 15000 > +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000 I do not see these macros being used anywhere in your code. > + > +/* state transition : > + OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF > + ON => OFF is always synchronized */ > +#define PMU_ELPG_STAT_OFF 0 /* elpg is off */ > +#define PMU_ELPG_STAT_ON 1 /* elpg is on */ > +/* elpg is off, ALLOW cmd has been sent, wait for ack */ > +#define PMU_ELPG_STAT_ON_PENDING 2 > +/* elpg is on, DISALLOW cmd has been sent, wait for ack */ > +#define PMU_ELPG_STAT_OFF_PENDING 3 > +/* elpg is off, caller has requested on, but ALLOW > +cmd hasn't been sent due to ENABLE_ALLOW delay */ > +#define PMU_ELPG_STAT_OFF_ON_PENDING 4 Same here. If they are used by a future patch, introduce them at the time they actually become useful. > + > +/* Falcon Register index */ > +#define PMU_FALCON_REG_R0 (0) > +#define PMU_FALCON_REG_R1 (1) > +#define PMU_FALCON_REG_R2 (2) > +#define PMU_FALCON_REG_R3 (3) > +#define PMU_FALCON_REG_R4 (4) > +#define PMU_FALCON_REG_R5 (5) > +#define PMU_FALCON_REG_R6 (6) > +#define PMU_FALCON_REG_R7 (7) > +#define PMU_FALCON_REG_R8 (8) > +#define PMU_FALCON_REG_R9 (9) > +#define PMU_FALCON_REG_R10 (10) > +#define PMU_FALCON_REG_R11 (11) > +#define PMU_FALCON_REG_R12 (12) > +#define PMU_FALCON_REG_R13 (13) > +#define PMU_FALCON_REG_R14 (14) > +#define PMU_FALCON_REG_R15 (15) > +#define PMU_FALCON_REG_IV0 (16) > +#define PMU_FALCON_REG_IV1 (17) > +#define PMU_FALCON_REG_UNDEFINED (18) > +#define PMU_FALCON_REG_EV (19) > +#define PMU_FALCON_REG_SP (20) > +#define PMU_FALCON_REG_PC (21) > +#define PMU_FALCON_REG_IMB (22) > +#define PMU_FALCON_REG_DMB (23) > +#define PMU_FALCON_REG_CSW (24) > +#define PMU_FALCON_REG_CCR (25) > +#define PMU_FALCON_REG_SEC (26) > +#define PMU_FALCON_REG_CTX (27) > +#define PMU_FALCON_REG_EXCI (28) > +#define PMU_FALCON_REG_RSVD0 (29) > +#define PMU_FALCON_REG_RSVD1 (30) > +#define PMU_FALCON_REG_RSVD2 (31) > +#define PMU_FALCON_REG_SIZE (32) These ones are ok since it would not make sense to define only part of the regs... > + > +/* Choices for pmu_state */ > +#define PMU_STATE_OFF 0 /* PMU is off */ > +#define PMU_STATE_STARTING 1 /* PMU is on, but not booted */ > +#define PMU_STATE_INIT_RECEIVED 2 /* PMU init message received */ > +#define PMU_STATE_ELPG_BOOTING 3 /* PMU is booting */ > +#define PMU_STATE_ELPG_BOOTED 4 /* ELPG is initialized */ > +#define PMU_STATE_LOADING_PG_BUF 5 /* Loading PG buf */ > +#define PMU_STATE_LOADING_ZBC 6 /* Loading ZBC buf */ > +#define PMU_STATE_STARTED 7 /* Fully unitialized */ But here again, the last 5 states are not used yet, so please introduce them as they become needed. > + > +#define PMU_QUEUE_COUNT 5 > + > +#define PMU_MAX_NUM_SEQUENCES (256) > +#define PMU_SEQ_BIT_SHIFT (5) > +#define PMU_SEQ_TBL_SIZE \ > + (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT) > + > +#define PMU_SHA1_GID_SIGNATURE 0xA7C66AD2 > +#define PMU_SHA1_GID_SIGNATURE_SIZE 4 > + > +#define PMU_SHA1_GID_SIZE 16 > + > +struct pmu_queue { > + Empty blank line. > + /* used by hw, for BIOS/SMI queue */ > + u32 mutex_id; > + u32 mutex_lock; > + /* used by sw, for LPQ/HPQ queue */ > + struct mutex mutex; > + > + /* current write position */ > + u32 position; > + /* physical dmem offset where this queue begins */ > + u32 offset; > + /* logical queue identifier */ > + u32 id; > + /* physical queue index */ > + u32 index; > + /* in bytes */ > + u32 size; > + > + /* open-flag */ > + u32 oflag; > + bool opened; /* opened implies locked */ > +}; > + > +struct pmu_sha1_gid { > + bool valid; > + u8 gid[PMU_SHA1_GID_SIZE]; > +}; > + > +struct pmu_sha1_gid_data { > + u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE]; > + u8 gid[PMU_SHA1_GID_SIZE]; > +}; > + > +struct pmu_desc { > + Empty blank line. > + struct pmu_ucode_desc *desc; > + struct pmu_buf_desc ucode; > + > + struct pmu_buf_desc pg_buf; This member doesn't seem to be needed now. > + /* TBD: remove this if ZBC seq is fixed */ > + struct pmu_buf_desc seq_buf; > + struct pmu_buf_desc trace_buf; > + bool buf_loaded; buf_loaded is never referenced in this code. > + > + struct pmu_sha1_gid gid_info; > + > + struct pmu_queue queue[PMU_QUEUE_COUNT]; > + > + struct pmu_sequence *seq; Wrong. pmu_sequence is defined in gk20a.h. This file is a generic one. Why would PMUs for other GPUs embed GK20A-specific structures? Actually it seems like the whole pmu_desc should be moved to GK20A-specific files, since it is not used elsewhere for now. > + unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE]; > + u32 next_seq_desc; > + > + struct pmu_mutex *mutex; > + u32 mutex_cnt; > + > + struct mutex pmu_copy_lock; > + struct mutex pmu_seq_lock; > + > + struct nvkm_pmu_allocator dmem; So as explained above, this should be replaced by a nvkm_mm. > + > + u32 *ucode_image; > + bool pmu_ready; > + > + u32 zbc_save_done; Yet another unreferenced member... > + > + u32 stat_dmem_offset; And another one. > + > + u32 elpg_stat; And another one. > + > + int pmu_state; > + > +#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */ And another one. > + struct work_struct isr_workq; > + struct mutex elpg_mutex; /* protect elpg enable/disable */ And another one. > +/* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */ > + int elpg_refcnt; Here too. > + > + bool initialized; > + > + void (*remove_support)(struct pmu_desc *pmu); So this function pointer is set, but never called! Is it unneeded, or have you forgot to call it when you should have? > + bool sw_ready; > + bool perfmon_ready; Unneeded member again. > + > + u32 sample_buffer; > + u32 load_shadow; > + u32 load_avg; > + > + struct mutex isr_mutex; > + bool isr_enabled; > + > + bool zbc_ready; This is only set to false in the destroy() function, so I guess you don't need this now... > + unsigned long perfmon_events_cnt; > + bool perfmon_sampling_enabled; > + u8 pmu_mode; > + u32 falcon_id; > + u32 aelpg_param[5]; And all these 5 members are also not needed now it seems. > + void *pmu_chip_data; From how you are using this member (to store a pointer to a kzalloc'd pmu_gk20a_data), it seems to be unneeded. Put the content of pmu_gk20a_data into gk20a_pmu_priv, and get rid of both this member and pmu_gk20a_data. And actually since both members of pmu_gk20a_data are completely unreferenced, they can be added in a later patch anyway, when they actually become useful. > + struct nvkm_pmu *pmu; > +}; > > struct nvkm_pmu_impl { > struct nvkm_oclass base; > @@ -39,5 +296,12 @@ struct nvkm_pmu_impl { > } data; > > void (*pgob)(struct nvkm_pmu *, bool); > + struct pmu_desc pmudata; > }; > + > +static inline struct nvkm_pmu *impl_from_pmu(struct pmu_desc *pmu) > +{ > + return pmu->pmu; > +} > + > #endif > diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h b/drm/nouveau/include/nvkm/subdev/pmu.h > index 7b86acc634a0..659b4e0ba02b 100644 > --- a/drm/nouveau/include/nvkm/subdev/pmu.h > +++ b/drm/nouveau/include/nvkm/subdev/pmu.h > @@ -1,7 +1,20 @@ > #ifndef __NVKM_PMU_H__ > #define __NVKM_PMU_H__ > #include <core/subdev.h> > +#include <core/device.h> > +#include <subdev/mmu.h> > +#include <linux/debugfs.h> > > +struct pmu_buf_desc { > + struct nvkm_gpuobj *pmubufobj; > + struct nvkm_vma pmubufvma; Your struct is already called "pmu_buf", so maybe call these members "obj" and "vma" simply. > + size_t size; > +}; > +struct pmu_priv_vm { > + struct nvkm_gpuobj *mem; > + struct nvkm_gpuobj *pgd; > + struct nvkm_vm *vm; > +}; > struct nvkm_pmu { > struct nvkm_subdev base; > > @@ -20,9 +33,20 @@ struct nvkm_pmu { > u32 message; > u32 data[2]; > } recv; > - > + wait_queue_head_t init_wq; This wq is initialized and never used. > + bool gr_initialised; Member only written once. > + struct dentry *debugfs; > + struct pmu_buf_desc *pg_buf; This member is never used, and by transition neither is the pg_buf of struct pmu_desc. > + struct pmu_priv_vm *pmuvm; > int (*message)(struct nvkm_pmu *, u32[2], u32, u32, u32, u32); > void (*pgob)(struct nvkm_pmu *, bool); > + int (*pmu_mutex_acquire)(struct nvkm_pmu *, u32 id, u32 *token); Never used because you are calling the function you affect to the pointer directly in the code (which happens to also be called pmu_mutex_acquire!) > + int (*pmu_mutex_release)(struct nvkm_pmu *, u32 id, u32 *token); Same here. > + int (*pmu_load_norm)(struct nvkm_pmu *pmu, u32 *load); > + int (*pmu_load_update)(struct nvkm_pmu *pmu); > + void (*pmu_reset_load_counters)(struct nvkm_pmu *pmu); > + void (*pmu_get_load_counters)(struct nvkm_pmu *pmu, u32 *busy_cycles, > + u32 *total_cycles); These four ones are never called. Introduce members and functions only when they become needed. > }; > > static inline struct nvkm_pmu * > diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c b/drm/nouveau/nvkm/subdev/pmu/base.c > index 054b2d2eec35..6afd389b9764 100644 > --- a/drm/nouveau/nvkm/subdev/pmu/base.c > +++ b/drm/nouveau/nvkm/subdev/pmu/base.c > @@ -25,6 +25,114 @@ > > #include <subdev/timer.h> > > +/* init allocator struct */ > +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator, > + const char *name, u32 start, u32 len) > +{ > + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator)); > + > + strncpy(allocator->name, name, 32); > + > + allocator->base = start; > + allocator->limit = start + len - 1; > + > + allocator->bitmap = kcalloc(BITS_TO_LONGS(len), sizeof(long), > + GFP_KERNEL); > + if (!allocator->bitmap) > + return -ENOMEM; > + > + allocator_dbg(allocator, "%s : base %d, limit %d", > + allocator->name, allocator->base); > + > + init_rwsem(&allocator->rw_sema); > + > + allocator->alloc = nvkm_pmu_allocator_block_alloc; > + allocator->free = nvkm_pmu_allocator_block_free; > + > + return 0; > +} > + > +/* destroy allocator, free all remaining blocks if any */ > +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator) > +{ > + down_write(&allocator->rw_sema); > + > + kfree(allocator->bitmap); > + > + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator)); > +} > + > +/* > + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is > + * returned to caller in *addr. > + * > + * contiguous allocation, which allocates one block of > + * contiguous address. > +*/ > +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator, > + u32 *addr, u32 len, u32 align) > +{ > + unsigned long _addr; > + > + allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len); > + > + if ((*addr != 0 && *addr < allocator->base) || /* check addr range */ > + *addr + len > allocator->limit || /* check addr range */ > + *addr & (align - 1) || /* check addr alignment */ > + len == 0) /* check len */ > + return -EINVAL; > + > + len = ALIGN(len, align); > + if (!len) > + return -ENOMEM; > + > + down_write(&allocator->rw_sema); > + > + _addr = bitmap_find_next_zero_area(allocator->bitmap, > + allocator->limit - allocator->base + 1, > + *addr ? (*addr - allocator->base) : 0, > + len, > + align - 1); > + if ((_addr > allocator->limit - allocator->base + 1) || > + (*addr && *addr != (_addr + allocator->base))) { > + up_write(&allocator->rw_sema); > + return -ENOMEM; > + } > + > + bitmap_set(allocator->bitmap, _addr, len); > + *addr = allocator->base + _addr; > + > + up_write(&allocator->rw_sema); > + > + allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len); > + > + return 0; > +} > + > +/* free all blocks between start and end */ > +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator, > + u32 addr, u32 len, u32 align) > +{ > + allocator_dbg(allocator, "[in] addr %d, len %d", addr, len); > + > + if (addr + len > allocator->limit || /* check addr range */ > + addr < allocator->base || > + addr & (align - 1)) /* check addr alignment */ > + return -EINVAL; > + > + len = ALIGN(len, align); > + if (!len) > + return -EINVAL; > + > + down_write(&allocator->rw_sema); > + bitmap_clear(allocator->bitmap, addr - allocator->base, len); > + up_write(&allocator->rw_sema); > + > + allocator_dbg(allocator, "[out] addr %d, len %d", addr, len); > + > + return 0; > +} > + So all this code should go away when you switch to nvkm_mm. It was out-of-place anyway: this is a standard address space allocator and has nothing specific to PMU. That's a lot of things to fix already, so I will hold my review of pmu/gk20a.c for next time. Just a few remarks about the most obvious problems though: The file is a mess. Functions appear without any logical order, so you end up making declarations that could be avoided if things were ordered a bit better. For instance, pmu_read_message() is only used by pmu_process_message(), but you have 3 functions between these two. A logical ordering of the code makes it much easier to read: "building blocks" functions first, more complex functions later. Ideally you would end up with a C file that has no forward-declarations. Again, some functions are absolutely not used, sometimes in worrying ways. Examples are gk20a_pmu_destroy and gk20a_pmu_create_, but I suspect there are others. For gk20a_pmu_create_, I don't even know why it is here in the first place. It seems like its code should be gk20a_pmu_ctor() instead, and it sets function pointers that are apparently never called because they are remaining NULL and things seem to go just fine? This patch should definitely be split into different bits to allow a more pleasant review. Right now it is almost impossible to understand what it does. Suggestion for splitting: 1) Add firmware loading ability, bootstrap PMU (since these two tasks cannot be separated I guess) 2) Add message receiving/posting ability 3) DebugFS support This should be a good beginning to make things more readable. There are other things to comment on, but let's start with this. Keep in mind that upstreaming is more than just trying to make the downstream code fit as-is in the upstream kernel. You need to reshape things when it makes sense, and replace custom-built solutions with the ones that already exist. Also important is to make sure you uncover things in a logical way, in chunks as small as possible to the unfamiliar reader can understand them (for this particular series I don't think we can go with less-than-300-lines patches though). In other words, it is ok to send a 3000 lines patch series, if everything appears progressively and logically. A 3000 lines patch however is likely to be frown upon. Looking forward to seeing v2 and hopefully diving deeper into this - good luck! Alex.