Dan Magenheimer
2009-Aug-05 17:45 UTC
[Xen-devel] [PATCH] tmem: save/restore/migrate/livemigrate and shared pool authentication
Attached patch implements save/restore/migration/livemigration for transcendent memory ("tmem"). Without this patch, domains using tmem may in some cases lose data when doing save/restore or migrate/livemigrate. Also included in this patch is support for a new (privileged) hypercall for authorizing domains to share pools; this provides the foundation to accomodate upstream linux requests for security for shared pools. Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> (Inline and attachment in case my mailer botches it.) ============================diff -r 5333e6497af6 tools/libxc/xc_domain_restore.c --- a/tools/libxc/xc_domain_restore.c Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/libxc/xc_domain_restore.c Wed Aug 05 11:17:18 2009 -0600 @@ -533,6 +533,27 @@ int xc_domain_restore(int xc_handle, int } xc_set_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, vm86_tss); + continue; + } + + if ( j == -5 ) + { + DPRINTF("xc_domain_restore start tmem\n"); + if ( xc_tmem_restore(xc_handle, dom, io_fd) ) + { + ERROR("error reading/restoring tmem"); + goto out; + } + continue; + } + + if ( j == -6 ) + { + if ( xc_tmem_restore_extra(xc_handle, dom, io_fd) ) + { + ERROR("error reading/restoring tmem extra"); + goto out; + } continue; } diff -r 5333e6497af6 tools/libxc/xc_domain_save.c --- a/tools/libxc/xc_domain_save.c Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/libxc/xc_domain_save.c Wed Aug 05 11:17:18 2009 -0600 @@ -758,6 +758,7 @@ int xc_domain_save(int xc_handle, int io int live = (flags & XCFLAGS_LIVE); int debug = (flags & XCFLAGS_DEBUG); int race = 0, sent_last_iter, skip_this_iter; + int tmem_saved = 0; /* The new domain''s shared-info frame number. */ unsigned long shared_info_frame; @@ -995,6 +996,13 @@ int xc_domain_save(int xc_handle, int io } print_stats(xc_handle, dom, 0, &stats, 0); + + tmem_saved = xc_tmem_save(xc_handle, dom, io_fd, live, -5); + if ( tmem_saved == -1 ) + { + ERROR("Error when writing to state file (tmem)"); + goto out; + } /* Now write out each data page, canonicalising page tables as we go... */ for ( ; ; ) @@ -1316,6 +1324,13 @@ int xc_domain_save(int xc_handle, int io } DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame); + if ( (tmem_saved > 0) && + (xc_tmem_save_extra(xc_handle,dom,io_fd,-6) == -1) ) + { + ERROR("Error when writing to state file (tmem)"); + goto out; + } + } if ( xc_shadow_control(xc_handle, dom, @@ -1605,6 +1620,9 @@ int xc_domain_save(int xc_handle, int io out: + if ( tmem_saved != 0 && live ) + xc_tmem_save_done(xc_handle, dom); + if ( live ) { if ( xc_shadow_control(xc_handle, dom, diff -r 5333e6497af6 tools/libxc/xc_tmem.c --- a/tools/libxc/xc_tmem.c Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/libxc/xc_tmem.c Wed Aug 05 11:17:18 2009 -0600 @@ -36,6 +36,7 @@ int xc_tmem_control(int xc, uint32_t cli_id, uint32_t arg1, uint32_t arg2, + uint64_t arg3, void *buf) { tmem_op_t op; @@ -45,9 +46,10 @@ int xc_tmem_control(int xc, op.pool_id = pool_id; op.u.ctrl.subop = subop; op.u.ctrl.cli_id = cli_id; + set_xen_guest_handle(op.u.ctrl.buf,buf); op.u.ctrl.arg1 = arg1; op.u.ctrl.arg2 = arg2; - op.u.ctrl.buf.p = buf; + op.u.ctrl.arg3 = arg3; if (subop == TMEMC_LIST) { if ((arg1 != 0) && (lock_pages(buf, arg1) != 0)) @@ -72,6 +74,376 @@ int xc_tmem_control(int xc, return rc; } +static int xc_tmem_uuid_parse(char *uuid_str, uint64_t *uuid_lo, uint64_t *uuid_hi) +{ + char *p = uuid_str; + uint64_t *x = uuid_hi; + int i = 0, digit; + + *uuid_lo = 0; *uuid_hi = 0; + for ( p = uuid_str, i = 0; i != 36 && *p != ''\0''; p++, i++ ) + { + if ( (i == 8 || i == 13 || i == 18 || i == 23) ) + { + if ( *p != ''-'' ) + return -1; + if ( i == 18 ) + x = uuid_lo; + continue; + } + else if ( *p >= ''0'' && *p <= ''9'' ) + digit = *p - ''0''; + else if ( *p >= ''A'' && *p <= ''F'' ) + digit = *p - ''A''; + else if ( *p >= ''a'' && *p <= ''f'' ) + digit = *p - ''a''; + else + return -1; + *x = (*x << 4) | digit; + } + if ( (i != 1 && i != 36) || *p != ''\0'' ) + return -1; + return 0; +} + +int xc_tmem_auth(int xc, + int cli_id, + char *uuid_str, + int arg1) +{ + tmem_op_t op; + + op.cmd = TMEM_AUTH; + op.pool_id = 0; + op.u.new.arg1 = cli_id; + op.u.new.flags = arg1; + if ( xc_tmem_uuid_parse(uuid_str, &op.u.new.uuid[0], + &op.u.new.uuid[1]) < 0 ) + { + PERROR("Can''t parse uuid, use xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"); + return -1; + } + + return do_tmem_op(xc, &op); +} + +/* Save/restore/live migrate */ + +/* + Note that live migration complicates the save/restore format in + multiple ways: Though saving/migration can only occur when all + tmem pools belonging to the domain-being-saved are frozen and + this ensures that new pools can''t be created or existing pools + grown (in number of pages), it is possible during a live migration + that pools may be destroyed and pages invalidated while the migration + is in process. As a result, (1) it is not safe to pre-specify counts + for these values precisely, but only as a "max", and (2) a "invalidation" + list (of pools, objects, pages) must be appended when the domain is truly + suspended. + */ + +/* returns 0 if nothing to save, -1 if error saving, 1 if saved successfully */ +int xc_tmem_save(int xc, int dom, int io_fd, int live, int field_marker) +{ + int marker = field_marker; + int i, j; + uint32_t max_pools, version; + uint32_t weight, cap, flags; + uint32_t pool_id; + uint32_t minusone = -1; + struct tmem_handle *h; + + if ( xc_tmem_control(xc,0,TMEMC_SAVE_BEGIN,dom,live,0,0,NULL) <= 0 ) + return 0; + + if ( write_exact(io_fd, &marker, sizeof(marker)) ) + return -1; + version = xc_tmem_control(xc,0,TMEMC_SAVE_GET_VERSION,0,0,0,0,NULL); + if ( write_exact(io_fd, &version, sizeof(version)) ) + return -1; + max_pools = xc_tmem_control(xc,0,TMEMC_SAVE_GET_MAXPOOLS,0,0,0,0,NULL); + if ( write_exact(io_fd, &max_pools, sizeof(max_pools)) ) + return -1; + if ( version == -1 || max_pools == -1 ) + return -1; + if ( write_exact(io_fd, &minusone, sizeof(minusone)) ) + return -1; + flags = xc_tmem_control(xc,0,TMEMC_SAVE_GET_CLIENT_FLAGS,dom,0,0,0,NULL); + if ( write_exact(io_fd, &flags, sizeof(flags)) ) + return -1; + weight = xc_tmem_control(xc,0,TMEMC_SAVE_GET_CLIENT_WEIGHT,dom,0,0,0,NULL); + if ( write_exact(io_fd, &weight, sizeof(weight)) ) + return -1; + cap = xc_tmem_control(xc,0,TMEMC_SAVE_GET_CLIENT_CAP,dom,0,0,0,NULL); + if ( write_exact(io_fd, &cap, sizeof(cap)) ) + return -1; + if ( flags == -1 || weight == -1 || cap == -1 ) + return -1; + if ( write_exact(io_fd, &minusone, sizeof(minusone)) ) + return -1; + for ( i = 0; i < max_pools; i++ ) + { + uint64_t uuid[2]; + uint32_t n_pages; + uint32_t pagesize; + char *buf = NULL; + int bufsize = 0; + int checksum = 0; + + /* get pool id, flags, pagesize, n_pages, uuid */ + flags = xc_tmem_control(xc,i,TMEMC_SAVE_GET_POOL_FLAGS,dom,0,0,0,NULL); + if ( flags != -1 ) + { + pool_id = i; + n_pages = xc_tmem_control(xc,i,TMEMC_SAVE_GET_POOL_NPAGES,dom,0,0,0,NULL); + if ( !(flags & TMEM_POOL_PERSIST) ) + n_pages = 0; + (void)xc_tmem_control(xc,i,TMEMC_SAVE_GET_POOL_UUID,dom,sizeof(uuid),0,0,&uuid); + if ( write_exact(io_fd, &pool_id, sizeof(pool_id)) ) + return -1; + if ( write_exact(io_fd, &flags, sizeof(flags)) ) + return -1; + if ( write_exact(io_fd, &n_pages, sizeof(n_pages)) ) + return -1; + if ( write_exact(io_fd, &uuid, sizeof(uuid)) ) + return -1; + if ( n_pages == 0 ) + continue; + + pagesize = 1 << (((flags >> TMEM_POOL_PAGESIZE_SHIFT) & + TMEM_POOL_PAGESIZE_MASK) + 12); + if ( pagesize > bufsize ) + { + bufsize = pagesize + sizeof(struct tmem_handle); + if ( (buf = realloc(buf,bufsize)) == NULL ) + return -1; + } + for ( j = n_pages; j > 0; j-- ) + { + int ret; + if ( (ret = xc_tmem_control(xc, pool_id, + TMEMC_SAVE_GET_NEXT_PAGE, dom, + bufsize, 0, 0, buf)) > 0 ) + { + h = (struct tmem_handle *)buf; + if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) ) + return -1; + if ( write_exact(io_fd, &h->index, sizeof(h->index)) ) + return -1; + h++; + checksum += *(char *)h; + if ( write_exact(io_fd, h, pagesize) ) + return -1; + } else if ( ret == 0 ) { + continue; + } else { + /* page list terminator */ + h = (struct tmem_handle *)buf; + h->oid = -1; + if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) ) + return -1; + break; + } + } + DPRINTF("saved %d tmem pages for dom=%d pool=%d, checksum=%x\n", + n_pages-j,dom,pool_id,checksum); + } + } + /* pool list terminator */ + minusone = -1; + if ( write_exact(io_fd, &minusone, sizeof(minusone)) ) + return -1; + + return 1; +} + +/* only called for live migration */ +int xc_tmem_save_extra(int xc, int dom, int io_fd, int field_marker) +{ + struct tmem_handle handle; + int marker = field_marker; + uint32_t minusone; + int count = 0, checksum = 0; + + if ( write_exact(io_fd, &marker, sizeof(marker)) ) + return -1; + while ( xc_tmem_control(xc, 0, TMEMC_SAVE_GET_NEXT_INV, dom, + sizeof(handle),0,0,&handle) > 0 ) { + if ( write_exact(io_fd, &handle.pool_id, sizeof(handle.pool_id)) ) + return -1; + if ( write_exact(io_fd, &handle.oid, sizeof(handle.oid)) ) + return -1; + if ( write_exact(io_fd, &handle.index, sizeof(handle.index)) ) + return -1; + count++; + checksum += handle.pool_id + handle.oid + handle.index; + } + if ( count ) + DPRINTF("needed %d tmem invalidates, check=%d\n",count,checksum); + minusone = -1; + if ( write_exact(io_fd, &minusone, sizeof(minusone)) ) + return -1; + return 0; +} + +/* only called for live migration */ +void xc_tmem_save_done(int xc, int dom) +{ + xc_tmem_control(xc,0,TMEMC_SAVE_END,dom,0,0,0,NULL); +} + +/* restore routines */ + +static int xc_tmem_restore_new_pool( + int xc, + int cli_id, + uint32_t pool_id, + uint32_t flags, + uint64_t uuid_lo, + uint64_t uuid_hi) +{ + tmem_op_t op; + + op.cmd = TMEM_RESTORE_NEW; + op.pool_id = pool_id; + op.u.new.arg1 = cli_id; + op.u.new.flags = flags; + op.u.new.uuid[0] = uuid_lo; + op.u.new.uuid[1] = uuid_hi; + + return do_tmem_op(xc, &op); +} + +int xc_tmem_restore(int xc, int dom, int io_fd) +{ + uint32_t save_max_pools, save_version; + uint32_t this_max_pools, this_version; + uint32_t pool_id; + uint32_t minusone; + uint32_t weight, cap, flags; + int checksum = 0; + + save_version = xc_tmem_control(xc,0,TMEMC_SAVE_GET_VERSION,dom,0,0,0,NULL); + if ( save_version == -1 ) + return -1; /* domain doesn''t exist */ + save_max_pools = xc_tmem_control(xc,0,TMEMC_SAVE_GET_MAXPOOLS,0,0,0,0,NULL); + if ( read_exact(io_fd, &this_version, sizeof(this_version)) ) + return -1; + if ( read_exact(io_fd, &this_max_pools, sizeof(this_max_pools)) ) + return -1; + /* FIXME check here to ensure no version mismatch or maxpools mismatch */ + if ( read_exact(io_fd, &minusone, sizeof(minusone)) ) + return -1; + if ( minusone != -1 ) + return -1; + if ( xc_tmem_control(xc,0,TMEMC_RESTORE_BEGIN,dom,0,0,0,NULL) < 0 ) + return -1; + if ( read_exact(io_fd, &flags, sizeof(flags)) ) + return -1; + if ( flags & TMEM_CLIENT_COMPRESS ) + if ( xc_tmem_control(xc,0,TMEMC_SET_COMPRESS,dom,1,0,0,NULL) < 0 ) + return -1; + if ( flags & TMEM_CLIENT_FROZEN ) + if ( xc_tmem_control(xc,0,TMEMC_FREEZE,dom,0,0,0,NULL) < 0 ) + return -1; + if ( read_exact(io_fd, &weight, sizeof(weight)) ) + return -1; + if ( xc_tmem_control(xc,0,TMEMC_SET_WEIGHT,dom,0,0,0,NULL) < 0 ) + return -1; + if ( read_exact(io_fd, &cap, sizeof(cap)) ) + return -1; + if ( xc_tmem_control(xc,0,TMEMC_SET_CAP,dom,0,0,0,NULL) < 0 ) + return -1; + if ( read_exact(io_fd, &minusone, sizeof(minusone)) ) + return -1; + while ( read_exact(io_fd, &pool_id, sizeof(pool_id)) == 0 && pool_id != -1 ) + { + uint64_t uuid[2]; + uint32_t n_pages; + char *buf = NULL; + int bufsize = 0, pagesize; + int j; + + if ( read_exact(io_fd, &flags, sizeof(flags)) ) + return -1; + if ( read_exact(io_fd, &n_pages, sizeof(n_pages)) ) + return -1; + if ( read_exact(io_fd, &uuid, sizeof(uuid)) ) + return -1; + if ( xc_tmem_restore_new_pool(xc, dom, pool_id, + flags, uuid[0], uuid[1]) < 0) + return -1; + if ( n_pages <= 0 ) + continue; + + pagesize = 1 << (((flags >> TMEM_POOL_PAGESIZE_SHIFT) & + TMEM_POOL_PAGESIZE_MASK) + 12); + if ( pagesize > bufsize ) + { + bufsize = pagesize; + if ( (buf = realloc(buf,bufsize)) == NULL ) + return -1; + } + for ( j = n_pages; j > 0; j-- ) + { + uint64_t oid; + uint32_t index; + int rc; + if ( read_exact(io_fd, &oid, sizeof(oid)) ) + return -1; + if ( oid == -1 ) + break; + if ( read_exact(io_fd, &index, sizeof(index)) ) + return -1; + if ( read_exact(io_fd, buf, pagesize) ) + return -1; + checksum += *buf; + if ( (rc = xc_tmem_control(xc, pool_id, TMEMC_RESTORE_PUT_PAGE, + dom, bufsize, index, oid, buf)) <= 0 ) + { + DPRINTF("xc_tmem_restore: putting page failed, rc=%d\n",rc); + return -1; + } + } + if ( n_pages ) + DPRINTF("restored %d tmem pages for dom=%d pool=%d, check=%x\n", + n_pages-j,dom,pool_id,checksum); + } + if ( pool_id != -1 ) + return -1; + + return 0; +} + +/* only called for live migration, must be called after suspend */ +int xc_tmem_restore_extra(int xc, int dom, int io_fd) +{ + uint32_t pool_id; + uint64_t oid; + uint32_t index; + int count = 0; + int checksum = 0; + + while ( read_exact(io_fd, &pool_id, sizeof(pool_id)) == 0 && pool_id != -1 ) + { + if ( read_exact(io_fd, &oid, sizeof(oid)) ) + return -1; + if ( read_exact(io_fd, &index, sizeof(index)) ) + return -1; + if ( xc_tmem_control(xc, pool_id, TMEMC_RESTORE_FLUSH_PAGE, dom, + 0,index,oid,NULL) <= 0 ) + return -1; + count++; + checksum += pool_id + oid + index; + } + if ( pool_id != -1 ) + return -1; + if ( count ) + DPRINTF("invalidated %d tmem pages, check=%d\n",count,checksum); + + return 0; +} + /* * Local variables: * mode: C diff -r 5333e6497af6 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/libxc/xenctrl.h Wed Aug 05 11:17:18 2009 -0600 @@ -1276,12 +1276,13 @@ int xc_set_cpuidle_max_cstate(int xc_han /** * tmem operations */ -int xc_tmem_control(int xc, - int32_t pool_id, - uint32_t subop, - uint32_t cli_id, - uint32_t arg1, - uint32_t arg2, - void *buf); +int xc_tmem_control(int xc, int32_t pool_id, uint32_t subop, uint32_t cli_id, + uint32_t arg1, uint32_t arg2, uint64_t arg3, void *buf); +int xc_tmem_auth(int xc_handle, int cli_id, char *uuid_str, int arg1); +int xc_tmem_save(int xc_handle, int dom, int live, int fd, int field_marker); +int xc_tmem_save_extra(int xc_handle, int dom, int fd, int field_marker); +void xc_tmem_save_done(int xc_handle, int dom); +int xc_tmem_restore(int xc_handle, int dom, int fd); +int xc_tmem_restore_extra(int xc_handle, int dom, int fd); #endif /* XENCTRL_H */ diff -r 5333e6497af6 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/python/xen/lowlevel/xc/xc.c Wed Aug 05 11:17:18 2009 -0600 @@ -1523,20 +1523,21 @@ static PyObject *pyxc_tmem_control(XcObj uint32_t cli_id; uint32_t arg1; uint32_t arg2; + uint64_t arg3; char *buf; char _buffer[32768], *buffer = _buffer; int rc; - static char *kwd_list[] = { "pool_id", "subop", "cli_id", "arg1", "arg2", "buf", NULL }; + static char *kwd_list[] = { "pool_id", "subop", "cli_id", "arg1", "arg2", "arg3", "buf", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiiis", kwd_list, - &pool_id, &subop, &cli_id, &arg1, &arg2, &buf) ) + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiiiis", kwd_list, + &pool_id, &subop, &cli_id, &arg1, &arg2, &arg3, &buf) ) return NULL; if ( (subop == TMEMC_LIST) && (arg1 > 32768) ) arg1 = 32768; - if ( (rc = xc_tmem_control(self->xc_handle, pool_id, subop, cli_id, arg1, arg2, buffer)) < 0 ) + if ( (rc = xc_tmem_control(self->xc_handle, pool_id, subop, cli_id, arg1, arg2, arg3, buffer)) < 0 ) return Py_BuildValue("i", rc); switch (subop) { @@ -1553,6 +1554,28 @@ static PyObject *pyxc_tmem_control(XcObj default: break; } + + Py_INCREF(zero); + return zero; +} + +static PyObject *pyxc_tmem_shared_auth(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t cli_id; + uint32_t arg1; + char *uuid_str; + int rc; + + static char *kwd_list[] = { "cli_id", "uuid_str", "arg1" }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "isi", kwd_list, + &cli_id, &uuid_str, &arg1) ) + return NULL; + + if ( (rc = xc_tmem_auth(self->xc_handle, cli_id, uuid_str, arg1)) < 0 ) + return Py_BuildValue("i", rc); Py_INCREF(zero); return zero; @@ -2029,6 +2052,15 @@ static PyMethodDef pyxc_methods[] = { " buf [str]: Buffer.\n\n" "Returns: [int] 0 or [str] tmem info on success; exception on error.\n" }, + { "tmem_shared_auth", + (PyCFunction)pyxc_tmem_shared_auth, + METH_VARARGS | METH_KEYWORDS, "\n" + "De/authenticate a shared tmem pool.\n" + " cli_id [int]: Client identifier (-1 == all).\n" + " uuid_str [str]: uuid.\n" + " auth [int]: 0|1 .\n" + "Returns: [int] 0 on success; exception on error.\n" }, + { NULL, NULL, 0, NULL } }; diff -r 5333e6497af6 tools/python/xen/xend/XendAPI.py --- a/tools/python/xen/xend/XendAPI.py Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/python/xen/xend/XendAPI.py Wed Aug 05 11:17:18 2009 -0600 @@ -933,7 +933,8 @@ class XendAPI(object): (''tmem_list'', None), (''tmem_set_weight'', None), (''tmem_set_cap'', None), - (''tmem_set_compress'', None)] + (''tmem_set_compress'', None), + (''tmem_shared_auth'', None)] host_funcs = [(''get_by_name_label'', None), (''list_methods'', None)] @@ -1129,6 +1130,14 @@ class XendAPI(object): node = XendNode.instance() try: node.tmem_set_compress(cli_id, value) + except Exception, e: + return xen_api_error(e) + return xen_api_success_void() + + def host_tmem_shared_auth(self, _, host_ref, cli_id, uuid_str, auth): + node = XendNode.instance() + try: + node.tmem_shared_auth(cli_id, uuid_str, auth) except Exception, e: return xen_api_error(e) return xen_api_success_void() diff -r 5333e6497af6 tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/python/xen/xend/XendNode.py Wed Aug 05 11:17:18 2009 -0600 @@ -949,62 +949,72 @@ class XendNode: subop = TMEMC_LIST arg1 = 32768 arg2 = use_long + arg3 = 0 buf = '''' - return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf) def tmem_thaw(self, cli_id): pool_id = -1 subop = TMEMC_THAW arg1 = 0 arg2 = 0 + arg3 = 0 buf = '''' - return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf) def tmem_freeze(self, cli_id): pool_id = -1 subop = TMEMC_FREEZE arg1 = 0 arg2 = 0 + arg3 = 0 buf = '''' - return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf) def tmem_flush(self, cli_id, pages): pool_id = -1 subop = TMEMC_FLUSH arg1 = pages arg2 = 0 + arg3 = 0 buf = '''' - return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf) def tmem_destroy(self, cli_id): pool_id = -1 subop = TMEMC_DESTROY arg1 = 0 arg2 = 0 + arg3 = 0 buf = '''' - return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf) def tmem_set_weight(self, cli_id, arg1): pool_id = -1 subop = TMEMC_SET_WEIGHT arg2 = 0 + arg3 = 0 buf = '''' - return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf) def tmem_set_cap(self, cli_id, arg1): pool_id = -1 subop = TMEMC_SET_CAP arg2 = 0 + arg3 = 0 buf = '''' - return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf) def tmem_set_compress(self, cli_id, arg1): pool_id = -1 subop = TMEMC_SET_COMPRESS arg2 = 0 + arg3 = 0 buf = '''' - return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, buf) + return self.xc.tmem_control(pool_id, subop, cli_id, arg1, arg2, arg3, buf) + def tmem_shared_auth(self, cli_id, uuid_str, auth): + return self.xc.tmem_auth(cli_id, uuid_str, auth) def instance(): global inst diff -r 5333e6497af6 tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/python/xen/xend/balloon.py Wed Aug 05 11:17:18 2009 -0600 @@ -111,7 +111,7 @@ def free(need_mem, dominfo): rlimit = RETRY_LIMIT # stop tmem from absorbing any more memory (must THAW when done!) - xc.tmem_control(0,TMEMC_FREEZE,-1, 0, 0, "") + xc.tmem_control(0,TMEMC_FREEZE,-1, 0, 0, 0, "") # If unreasonable memory size is required, we give up waiting # for ballooning or scrubbing, as if had retried. @@ -130,7 +130,7 @@ def free(need_mem, dominfo): if freeable_mem < need_mem and need_mem < max_free_mem: # flush memory from tmem to scrub_mem and reobtain physinfo need_tmem_kb = need_mem - freeable_mem - tmem_kb = xc.tmem_control(0,TMEMC_FLUSH,-1, need_tmem_kb, 0, "") + tmem_kb = xc.tmem_control(0,TMEMC_FLUSH,-1, need_tmem_kb, 0, 0, "") log.debug("Balloon: tmem relinquished %d KiB of %d KiB requested.", tmem_kb, need_tmem_kb) physinfo = xc.physinfo() @@ -232,5 +232,5 @@ def free(need_mem, dominfo): finally: # allow tmem to accept pages again - xc.tmem_control(0,TMEMC_THAW,-1, 0, 0, "") + xc.tmem_control(0,TMEMC_THAW,-1, 0, 0, 0, "") del xc diff -r 5333e6497af6 tools/python/xen/xend/server/XMLRPCServer.py --- a/tools/python/xen/xend/server/XMLRPCServer.py Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/python/xen/xend/server/XMLRPCServer.py Wed Aug 05 11:17:18 2009 -0600 @@ -202,7 +202,8 @@ class XMLRPCServer: [''info'', ''pciinfo'', ''send_debug_keys'', ''tmem_list'', ''tmem_freeze'', ''tmem_thaw'', ''tmem_flush'', ''tmem_destroy'', ''tmem_set_weight'', - ''tmem_set_cap'', ''tmem_set_compress''], + ''tmem_set_cap'', ''tmem_set_compress'', + ''tmem_shared_auth''], ''node''), (XendDmesg, [''info'', ''clear''], ''node.dmesg'')]: inst = type.instance() diff -r 5333e6497af6 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Mon Jul 20 15:45:50 2009 +0100 +++ b/tools/python/xen/xm/main.py Wed Aug 05 11:17:18 2009 -0600 @@ -207,6 +207,7 @@ SUBCOMMAND_HELP = { ''tmem-set'' : (''[<Domain>|-a|--all] [weight=<weight>] [cap=<cap>] '' ''[compress=<compress>]'', ''Change tmem settings.''), + ''tmem-shared-auth'' : (''[<Domain>|-a|--all] [--uuid=<uuid>] [--auth=<0|1>]'', ''De/authenticate shared tmem pool.''), # security @@ -306,6 +307,11 @@ SUBCOMMAND_OPTIONS = { ), ''tmem-set'': ( (''-a'', ''--all'', ''Operate on all tmem.''), + ), + ''tmem-shared-auth'': ( + (''-a'', ''--all'', ''Authenticate for all tmem pools.''), + (''-u'', ''--uuid'', ''Specify uuid (abcdef01-2345-6789-01234567890abcdef).''), + (''-A'', ''--auth'', ''0=auth,1=deauth''), ), } @@ -427,6 +433,7 @@ tmem_commands = [ "tmem-freeze", "tmem-destroy", "tmem-set", + "tmem-shared-auth", ] all_commands = (domain_commands + host_commands + scheduler_commands + @@ -3128,6 +3135,46 @@ def xm_tmem_set(args): if compress is not None: server.xend.node.tmem_set_compress(domid, compress) +def xm_tmem_shared_auth(args): + try: + (options, params) = getopt.gnu_getopt(args, ''au:A:'', [''all'',''uuid='',''auth='']) + except getopt.GetoptError, opterr: + err(opterr) + usage(''tmem-shared-auth'') + + all = False + for (k, v) in options: + if k in [''-a'', ''--all'']: + all = True + + if not all and len(params) == 0: + err(''You must specify -a or --all or a domain id.'') + usage(''tmem-shared-auth'') + + if all: + domid = -1 + else: + try: + domid = int(params[0]) + params = params[1:] + except: + err(''Unrecognized domain id: %s'' % params[0]) + usage(''tmem-shared-auth'') + + for (k, v) in options: + if k in [''-u'', ''--uuid'']: + uuid_str = v + + auth = 0 + for (k, v) in options: + if k in [''-A'', ''--auth'']: + auth = v + + if serverType == SERVER_XEN_API: + return server.xenapi.host.tmem_shared_auth(domid,uuid_str,auth) + else: + return server.xend.node.tmem_shared_auth(domid,uuid_str,auth) + commands = { "shell": xm_shell, @@ -3210,6 +3257,7 @@ commands = { "tmem-destroy": xm_tmem_destroy, "tmem-list": xm_tmem_list, "tmem-set": xm_tmem_set, + "tmem-shared-auth": xm_tmem_shared_auth, } ## The commands supported by a separate argument parser in xend.xm. diff -r 5333e6497af6 xen/common/tmem.c --- a/xen/common/tmem.c Mon Jul 20 15:45:50 2009 +0100 +++ b/xen/common/tmem.c Wed Aug 05 11:17:18 2009 -0600 @@ -26,6 +26,8 @@ #define EXPORT /* indicates code other modules are dependent upon */ #define FORWARD + +#define TMEM_SPEC_VERSION 0 /************ INTERFACE TO TMEM HOST-DEPENDENT (tmh) CODE ************/ @@ -105,6 +107,7 @@ DECL_CYC_COUNTER(decompress); #define MAX_GLOBAL_SHARED_POOLS 16 struct tm_pool; +struct tmem_page_descriptor; struct client { struct list_head client_list; struct tm_pool *pools[MAX_POOLS_PER_DOMAIN]; @@ -116,11 +119,20 @@ struct client { uint32_t cap; bool_t compress; bool_t frozen; + bool_t shared_auth_required; + /* for save/restore/migration */ + bool_t live_migrating; + bool_t was_frozen; + struct list_head persistent_invalidated_list; + struct tmem_page_descriptor *cur_pgp; + /* statistics collection */ unsigned long compress_poor, compress_nomem; unsigned long compressed_pages; uint64_t compressed_sum_size; uint64_t total_cycles; unsigned long succ_pers_puts, succ_eph_gets, succ_pers_gets; + /* shared pool authentication */ + uint64_t shared_auth_uuid[MAX_GLOBAL_SHARED_POOLS][2]; }; typedef struct client client_t; @@ -137,6 +149,7 @@ struct tm_pool { struct tm_pool { bool_t shared; bool_t persistent; + int pageshift; /* 0 == 2**12 */ struct list_head pool_list; /* FIXME do we need this anymore? */ client_t *client; uint64_t uuid[2]; /* 0 for private, non-zero for shared */ @@ -144,8 +157,11 @@ struct tm_pool { rwlock_t pool_rwlock; struct rb_root obj_rb_root[OBJ_HASH_BUCKETS]; /* protected by pool_rwlock */ struct list_head share_list; /* valid if shared */ - DECL_SENTINEL int shared_count; /* valid if shared */ + /* for save/restore/migration */ + struct list_head persistent_page_list; + struct tmem_page_descriptor *cur_pgp; + /* statistics collection */ atomic_t pgp_count; int pgp_count_max; long obj_count; /* atomicity depends on pool_rwlock held for write */ @@ -158,6 +174,7 @@ struct tm_pool { unsigned long gets, found_gets; unsigned long flushs, flushs_found; unsigned long flush_objs, flush_objs_found; + DECL_SENTINEL }; typedef struct tm_pool pool_t; @@ -189,16 +206,29 @@ typedef struct tmem_object_node objnode_ typedef struct tmem_object_node objnode_t; struct tmem_page_descriptor { - struct list_head global_eph_pages; - struct list_head client_eph_pages; - obj_t *obj; + union { + struct list_head global_eph_pages; + struct list_head client_inv_pages; + }; + union { + struct list_head client_eph_pages; + struct list_head pool_pers_pages; + }; + union { + obj_t *obj; + uint64_t inv_oid; /* used for invalid list only */ + }; uint32_t index; - size_t size; /* 0 == PAGE_SIZE (pfp), else compressed data (cdata) */ + size_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid, + else compressed data (cdata) */ union { pfp_t *pfp; /* page frame pointer */ char *cdata; /* compressed data */ }; - uint64_t timestamp; + union { + uint64_t timestamp; + uint32_t pool_id; /* used for invalid list only */ + }; DECL_SENTINEL }; typedef struct tmem_page_descriptor pgp_t; @@ -209,6 +239,7 @@ static LIST_HEAD(global_pool_list); static LIST_HEAD(global_pool_list); static pool_t *global_shared_pools[MAX_GLOBAL_SHARED_POOLS] = { 0 }; +static bool_t global_shared_auth = 0; static atomic_t client_weight_total = ATOMIC_INIT(0); static int tmem_initialized = 0; @@ -217,6 +248,7 @@ EXPORT DEFINE_SPINLOCK(tmem_spinlock); EXPORT DEFINE_SPINLOCK(tmem_spinlock); /* used iff tmh_lock_all */ EXPORT DEFINE_RWLOCK(tmem_rwlock); /* used iff !tmh_lock_all */ static DEFINE_SPINLOCK(eph_lists_spinlock); /* protects global AND clients */ +static DEFINE_SPINLOCK(pers_lists_spinlock); #define tmem_spin_lock(_l) do {if (!tmh_lock_all) spin_lock(_l);}while(0) #define tmem_spin_unlock(_l) do {if (!tmh_lock_all) spin_unlock(_l);}while(0) @@ -366,36 +398,61 @@ static NOINLINE void pgp_free(pgp_t *pgp ASSERT(pgp->obj != NULL); ASSERT_SENTINEL(pgp->obj,OBJ); ASSERT_SENTINEL(pgp->obj->pool,POOL); - ASSERT(list_empty(&pgp->global_eph_pages)); - ASSERT(list_empty(&pgp->client_eph_pages)); + ASSERT(pgp->obj->pool->client != NULL); if ( from_delete ) ASSERT(pgp_lookup_in_obj(pgp->obj,pgp->index) == NULL); ASSERT(pgp->obj->pool != NULL); pool = pgp->obj->pool; + if ( is_ephemeral(pool) ) + { + ASSERT(list_empty(&pgp->global_eph_pages)); + ASSERT(list_empty(&pgp->client_eph_pages)); + } pgp_free_data(pgp, pool); + atomic_dec_and_assert(global_pgp_count); + atomic_dec_and_assert(pool->pgp_count); + pgp->size = -1; + if ( is_persistent(pool) && pool->client->live_migrating ) + { + pgp->inv_oid = pgp->obj->oid; + pgp->pool_id = pool->pool_id; + return; + } INVERT_SENTINEL(pgp,PGD); pgp->obj = NULL; pgp->index = -1; - pgp->size = -1; - atomic_dec_and_assert(global_pgp_count); - atomic_dec_and_assert(pool->pgp_count); + tmem_free(pgp,sizeof(pgp_t),pool); +} + +static NOINLINE void pgp_free_from_inv_list(client_t *client, pgp_t *pgp) +{ + pool_t *pool = client->pools[pgp->pool_id]; + + ASSERT_SENTINEL(pool,POOL); + ASSERT_SENTINEL(pgp,PGD); + INVERT_SENTINEL(pgp,PGD); + pgp->obj = NULL; + pgp->index = -1; tmem_free(pgp,sizeof(pgp_t),pool); } /* remove the page from appropriate lists but not from parent object */ static void pgp_delist(pgp_t *pgp, bool_t no_eph_lock) { + client_t *client; + ASSERT(pgp != NULL); ASSERT(pgp->obj != NULL); ASSERT(pgp->obj->pool != NULL); - ASSERT(pgp->obj->pool->client != NULL); + client = pgp->obj->pool->client; + ASSERT(client != NULL); if ( is_ephemeral(pgp->obj->pool) ) { if ( !no_eph_lock ) tmem_spin_lock(&eph_lists_spinlock); if ( !list_empty(&pgp->client_eph_pages) ) - pgp->obj->pool->client->eph_count--; - ASSERT(pgp->obj->pool->client->eph_count >= 0); + client->eph_count--; + ASSERT(client->eph_count >= 0); list_del_init(&pgp->client_eph_pages); if ( !list_empty(&pgp->global_eph_pages) ) global_eph_count--; @@ -403,6 +460,20 @@ static void pgp_delist(pgp_t *pgp, bool_ list_del_init(&pgp->global_eph_pages); if ( !no_eph_lock ) tmem_spin_unlock(&eph_lists_spinlock); + } else { + if ( client->live_migrating ) + { + tmem_spin_lock(&pers_lists_spinlock); + list_add_tail(&pgp->client_inv_pages, + &client->persistent_invalidated_list); + if ( pgp != pgp->obj->pool->cur_pgp ) + list_del_init(&pgp->pool_pers_pages); + tmem_spin_unlock(&pers_lists_spinlock); + } else { + tmem_spin_lock(&pers_lists_spinlock); + list_del_init(&pgp->pool_pers_pages); + tmem_spin_unlock(&pers_lists_spinlock); + } } } @@ -564,6 +635,7 @@ static NOINLINE void obj_free(obj_t *obj ASSERT(obj->pgp_count == 0); pool = obj->pool; ASSERT(pool != NULL); + ASSERT(pool->client != NULL); ASSERT_WRITELOCK(&pool->pool_rwlock); if ( obj->tree_root.rnode != NULL ) /* may be a "stump" with no leaves */ radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free); @@ -685,11 +757,14 @@ static pool_t * pool_alloc(void) for (i = 0; i < OBJ_HASH_BUCKETS; i++) pool->obj_rb_root[i] = RB_ROOT; INIT_LIST_HEAD(&pool->pool_list); + INIT_LIST_HEAD(&pool->persistent_page_list); + pool->cur_pgp = NULL; rwlock_init(&pool->pool_rwlock); pool->pgp_count_max = pool->obj_count_max = 0; pool->objnode_count = pool->objnode_count_max = 0; atomic_set(&pool->pgp_count,0); - pool->obj_count = 0; + pool->obj_count = 0; pool->shared_count = 0; + pool->pageshift = PAGE_SHIFT - 12; pool->good_puts = pool->puts = pool->dup_puts_flushed = 0; pool->dup_puts_replaced = pool->no_mem_puts = 0; pool->found_gets = pool->gets = 0; @@ -805,6 +880,12 @@ static void pool_flush(pool_t *pool, cli is_persistent(pool) ? "persistent" : "ephemeral" , is_shared(pool) ? "shared" : "private"); printk("%s=%d pool_id=%d\n", cli_id_str,pool->client->cli_id,pool->pool_id); + if ( pool->client->live_migrating ) + { + printk("can''t %s pool while %s is live-migrating\n", + destroy?"destroy":"flush", client_str); + return; + } pool_destroy_objs(pool,0,CLI_ID_NULL); if ( destroy ) { @@ -815,10 +896,10 @@ static void pool_flush(pool_t *pool, cli /************ CLIENT MANIPULATION OPERATIONS **************************/ -static client_t *client_create(void) +static client_t *client_create(cli_id_t cli_id) { client_t *client = tmem_malloc(client_t,NULL); - cli_id_t cli_id = tmh_get_cli_id_from_current(); + int i; printk("tmem: initializing tmem capability for %s=%d...",cli_id_str,cli_id); if ( client == NULL ) @@ -834,15 +915,23 @@ static client_t *client_create(void) tmem_free(client,sizeof(client_t),NULL); return NULL; } - tmh_set_current_client(client); + tmh_set_client_from_id(client,cli_id); client->cli_id = cli_id; #ifdef __i386__ client->compress = 0; #else client->compress = tmh_compression_enabled(); #endif + client->shared_auth_required = tmh_shared_auth(); + for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) + client->shared_auth_uuid[i][0] + client->shared_auth_uuid[i][1] = -1L; + client->frozen = 0; client->live_migrating = 0; + client->weight = 0; client->cap = 0; list_add_tail(&client->client_list, &global_client_list); INIT_LIST_HEAD(&client->ephemeral_page_list); + INIT_LIST_HEAD(&client->persistent_invalidated_list); + client->cur_pgp = NULL; client->eph_count = client->eph_count_max = 0; client->total_cycles = 0; client->succ_pers_puts = 0; client->succ_eph_gets = 0; client->succ_pers_gets = 0; @@ -885,6 +974,11 @@ static bool_t client_over_quota(client_t return 0; return ( ((global_eph_count*100L) / client->eph_count ) > ((total*100L) / client->weight) ); +} + +static void client_freeze(client_t *client, int freeze) +{ + client->frozen = freeze; } /************ MEMORY REVOCATION ROUTINES *******************************/ @@ -993,7 +1087,8 @@ static unsigned long tmem_relinquish_npa /************ TMEM CORE OPERATIONS ************************************/ -static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn) +static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn, + void *cva) { void *dst, *p; size_t size; @@ -1011,7 +1106,7 @@ static NOINLINE int do_tmem_put_compress if ( pgp->pfp != NULL ) pgp_free_data(pgp, pgp->obj->pool); /* FIXME... is this right? */ START_CYC_COUNTER(compress); - ret = tmh_compress_from_client(cmfn, &dst, &size); + ret = tmh_compress_from_client(cmfn, &dst, &size, cva); if ( (ret == -EFAULT) || (ret == 0) ) goto out; else if ( (size == 0) || (size >= tmem_subpage_maxsize()) ) @@ -1034,7 +1129,7 @@ out: } static NOINLINE int do_tmem_dup_put(pgp_t *pgp, tmem_cli_mfn_t cmfn, - uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len) + uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cva) { pool_t *pool; obj_t *obj; @@ -1042,7 +1137,6 @@ static NOINLINE int do_tmem_dup_put(pgp_ pgp_t *pgpfound = NULL; int ret; - /* if we can successfully manipulate pgp to change out the data, do so */ ASSERT(pgp != NULL); ASSERT(pgp->pfp != NULL); ASSERT(pgp->size != -1); @@ -1052,10 +1146,12 @@ static NOINLINE int do_tmem_dup_put(pgp_ pool = obj->pool; ASSERT(pool != NULL); client = pool->client; - if ( len != 0 && tmh_compression_enabled() && - client->compress && pgp->size != 0 ) + if ( client->live_migrating ) + goto failed_dup; /* no dups allowed when migrating */ + /* can we successfully manipulate pgp to change out the data? */ + if ( len != 0 && client->compress && pgp->size != 0 ) { - ret = do_tmem_put_compress(pgp,cmfn); + ret = do_tmem_put_compress(pgp,cmfn,cva); if ( ret == 1 ) goto done; else if ( ret == 0 ) @@ -1072,7 +1168,7 @@ copy_uncompressed: if ( ( pgp->pfp = tmem_page_alloc(pool) ) == NULL ) goto failed_dup; /* tmh_copy_from_client properly handles len==0 and offsets != 0 */ - ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len); + ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,0); if ( ret == -EFAULT ) goto bad_copy; pgp->size = 0; @@ -1115,9 +1211,10 @@ failed_dup: } -static NOINLINE int do_tmem_put(pool_t *pool, uint64_t oid, uint32_t index, +static NOINLINE int do_tmem_put(pool_t *pool, + uint64_t oid, uint32_t index, tmem_cli_mfn_t cmfn, uint32_t tmem_offset, - uint32_t pfn_offset, uint32_t len) + uint32_t pfn_offset, uint32_t len, void *cva) { obj_t *obj = NULL, *objfound = NULL, *objnew = NULL; pgp_t *pgp = NULL, *pgpdel = NULL; @@ -1131,7 +1228,7 @@ static NOINLINE int do_tmem_put(pool_t * { ASSERT_SPINLOCK(&objfound->obj_spinlock); if ((pgp = pgp_lookup_in_obj(objfound, index)) != NULL) - return do_tmem_dup_put(pgp,cmfn,tmem_offset,pfn_offset,len); + return do_tmem_dup_put(pgp,cmfn,tmem_offset,pfn_offset,len,cva); } /* no puts allowed into a frozen pool (except dup puts) */ @@ -1162,10 +1259,10 @@ static NOINLINE int do_tmem_put(pool_t * ASSERT(ret != -EEXIST); pgp->index = index; - if ( len != 0 && tmh_compression_enabled() && client->compress ) + if ( len != 0 && client->compress ) { ASSERT(pgp->pfp == NULL); - ret = do_tmem_put_compress(pgp,cmfn); + ret = do_tmem_put_compress(pgp,cmfn,cva); if ( ret == 1 ) goto insert_page; if ( ret == -ENOMEM ) @@ -1189,7 +1286,7 @@ copy_uncompressed: goto delete_and_free; } /* tmh_copy_from_client properly handles len==0 (TMEM_NEW_PAGE) */ - ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len); + ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,cva); if ( ret == -EFAULT ) goto bad_copy; pgp->size = 0; @@ -1207,6 +1304,11 @@ insert_page: if (++client->eph_count > client->eph_count_max) client->eph_count_max = client->eph_count; tmem_spin_unlock(&eph_lists_spinlock); + } else { /* is_persistent */ + tmem_spin_lock(&pers_lists_spinlock); + list_add_tail(&pgp->pool_pers_pages, + &pool->persistent_page_list); + tmem_spin_unlock(&pers_lists_spinlock); } ASSERT( ((objnew==obj)||(objfound==obj)) && (objnew!=objfound)); if ( is_shared(pool) ) @@ -1249,7 +1351,7 @@ ASSERT(0); static NOINLINE int do_tmem_get(pool_t *pool, uint64_t oid, uint32_t index, tmem_cli_mfn_t cmfn, uint32_t tmem_offset, - uint32_t pfn_offset, uint32_t len) + uint32_t pfn_offset, uint32_t len, void *cva) { obj_t *obj; pgp_t *pgp; @@ -1279,12 +1381,13 @@ static NOINLINE int do_tmem_get(pool_t * if ( pgp->size != 0 ) { START_CYC_COUNTER(decompress); - if ( tmh_decompress_to_client(cmfn, pgp->cdata, pgp->size) == -EFAULT ) + if ( tmh_decompress_to_client(cmfn, pgp->cdata, + pgp->size, cva) == -EFAULT ) goto bad_copy; END_CYC_COUNTER(decompress); } else if ( tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset, - pfn_offset, len) == -EFAULT) + pfn_offset, len, cva) == -EFAULT) goto bad_copy; if ( is_ephemeral(pool) ) { @@ -1398,10 +1501,12 @@ static NOINLINE int do_tmem_destroy_pool return 1; } -static NOINLINE int do_tmem_new_pool(uint32_t flags, uint64_t uuid_lo, uint64_t uuid_hi) +static NOINLINE int do_tmem_new_pool(cli_id_t this_cli_id, + uint32_t this_pool_id, uint32_t flags, + uint64_t uuid_lo, uint64_t uuid_hi) { - client_t *client = tmh_client_from_current(); - cli_id_t cli_id = tmh_get_cli_id_from_current(); + client_t *client; + cli_id_t cli_id; int persistent = flags & TMEM_POOL_PERSIST; int shared = flags & TMEM_POOL_SHARED; int pagebits = (flags >> TMEM_POOL_PAGESIZE_SHIFT) @@ -1410,12 +1515,22 @@ static NOINLINE int do_tmem_new_pool(uin & TMEM_POOL_VERSION_MASK; pool_t *pool, *shpool; int s_poolid, d_poolid, first_unused_s_poolid; + int i; + if ( this_cli_id == CLI_ID_NULL ) + { + client = tmh_client_from_current(); + cli_id = tmh_get_cli_id_from_current(); + } else { + if ( (client = tmh_client_from_cli_id(this_cli_id)) == NULL) + return -EPERM; + cli_id = this_cli_id; + } ASSERT(client != NULL); printk("tmem: allocating %s-%s tmem pool for %s=%d...", persistent ? "persistent" : "ephemeral" , shared ? "shared" : "private", cli_id_str, cli_id); - if ( specversion != 0 ) + if ( specversion != TMEM_SPEC_VERSION ) { printk("failed... unsupported spec version\n"); return -EPERM; @@ -1430,14 +1545,35 @@ static NOINLINE int do_tmem_new_pool(uin printk("failed... out of memory\n"); return -ENOMEM; } - for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ ) + if ( this_cli_id != CLI_ID_NULL ) + { + d_poolid = this_pool_id; + if ( client->pools[d_poolid] != NULL ) + return -EPERM; + d_poolid = this_pool_id; + } + else for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ ) if ( client->pools[d_poolid] == NULL ) break; - if ( d_poolid == MAX_POOLS_PER_DOMAIN ) + if ( d_poolid >= MAX_POOLS_PER_DOMAIN ) { printk("failed... no more pool slots available for this %s\n", client_str); goto fail; + } + if ( shared ) + { + if ( uuid_lo == -1L && uuid_hi == -1L ) + shared = 0; + if ( client->shared_auth_required && !global_shared_auth ) + { + for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) + if ( (client->shared_auth_uuid[i][0] == uuid_lo) && + (client->shared_auth_uuid[i][1] == uuid_hi) ) + break; + if ( i == MAX_GLOBAL_SHARED_POOLS ) + shared = 0; + } } pool->shared = shared; pool->client = client; @@ -1491,7 +1627,7 @@ fail: /************ TMEM CONTROL OPERATIONS ************************************/ /* freeze/thaw all pools belonging to client cli_id (all domains if -1) */ -static int tmemc_freeze_pools(int cli_id, int arg) +static int tmemc_freeze_pools(cli_id_t cli_id, int arg) { client_t *client; bool_t freeze = (arg == TMEMC_FREEZE) ? 1 : 0; @@ -1502,20 +1638,20 @@ static int tmemc_freeze_pools(int cli_id if ( cli_id == CLI_ID_NULL ) { list_for_each_entry(client,&global_client_list,client_list) - client->frozen = freeze; + client_freeze(client,freeze); printk("tmem: all pools %s for all %ss\n",s,client_str); } else { if ( (client = tmh_client_from_cli_id(cli_id)) == NULL) return -1; - client->frozen = freeze; + client_freeze(client,freeze); printk("tmem: all pools %s for %s=%d\n",s,cli_id_str,cli_id); } return 0; } -static int tmemc_flush_mem(int cli_id, uint32_t kb) +static int tmemc_flush_mem(cli_id_t cli_id, uint32_t kb) { uint32_t npages, flushed_pages, flushed_kb; @@ -1699,7 +1835,7 @@ static int tmemc_list_global(tmem_cli_va return sum; } -static int tmemc_list(int cli_id, tmem_cli_va_t buf, uint32_t len, +static int tmemc_list(cli_id_t cli_id, tmem_cli_va_t buf, uint32_t len, bool_t use_long) { client_t *client; @@ -1716,7 +1852,6 @@ static int tmemc_list(int cli_id, tmem_c return -1; else off = tmemc_list_client(client, buf, 0, len, use_long); - return 0; } @@ -1740,6 +1875,9 @@ static int tmemc_set_var_one(client_t *c printk("tmem: cap set to %d for %s=%d\n",arg1,cli_id_str,cli_id); break; case TMEMC_SET_COMPRESS: +#ifdef __i386__ + return -1; +#endif client->compress = arg1 ? 1 : 0; printk("tmem: compression %s for %s=%d\n", arg1 ? "enabled" : "disabled",cli_id_str,cli_id); @@ -1751,7 +1889,7 @@ static int tmemc_set_var_one(client_t *c return 0; } -static int tmemc_set_var(int cli_id, uint32_t subop, uint32_t arg1) +static int tmemc_set_var(cli_id_t cli_id, uint32_t subop, uint32_t arg1) { client_t *client; @@ -1765,11 +1903,229 @@ static int tmemc_set_var(int cli_id, uin return 0; } -static int do_tmem_control(uint32_t subop, uint32_t cli_id32, - uint32_t arg1, uint32_t arg2, tmem_cli_va_t buf) +static NOINLINE int tmemc_shared_pool_auth(cli_id_t cli_id, uint64_t uuid_lo, + uint64_t uuid_hi, bool_t auth) +{ + client_t *client; + int i, free = -1; + + if ( cli_id == CLI_ID_NULL ) + { + global_shared_auth = auth; + return 1; + } + client = tmh_client_from_cli_id(cli_id); + for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) + { + if ( (client->shared_auth_uuid[i][0] == uuid_lo) && + (client->shared_auth_uuid[i][1] == uuid_hi) ) + { + if ( auth == 0 ) + client->shared_auth_uuid[i][0] + client->shared_auth_uuid[i][1] = -1L; + return 1; + } + if ( (auth == 1) && (client->shared_auth_uuid[i][0] == -1L) && + (client->shared_auth_uuid[i][1] == -1L) && (free == -1) ) + free = i; + } + if ( auth == 0 ) + return 0; + if ( auth == 1 && free == -1 ) + return -ENOMEM; + client->shared_auth_uuid[free][0] = uuid_lo; + client->shared_auth_uuid[free][1] = uuid_hi; + return 1; +} + +static NOINLINE int tmemc_save_subop(int cli_id, uint32_t pool_id, + uint32_t subop, tmem_cli_va_t buf, uint32_t arg1) +{ + client_t *client = tmh_client_from_cli_id(cli_id); + pool_t *pool = (client == NULL) ? NULL : client->pools[pool_id]; + uint32_t p; + uint64_t *uuid; + pgp_t *pgp, *pgp2; + + switch(subop) + { + case TMEMC_SAVE_BEGIN: + if ( client == NULL ) + return 0; + for (p = 0; p < MAX_POOLS_PER_DOMAIN; p++) + if ( client->pools[p] != NULL ) + break; + if ( p == MAX_POOLS_PER_DOMAIN ) + return 0; + client->was_frozen = client->frozen; + client->frozen = 1; + if ( arg1 != 0 ) + client->live_migrating = 1; + return 1; + case TMEMC_RESTORE_BEGIN: + ASSERT(client == NULL); + if ( (client = client_create(cli_id)) == NULL ) + return -1; + return 1; + case TMEMC_SAVE_GET_VERSION: + return TMEM_SPEC_VERSION; + case TMEMC_SAVE_GET_MAXPOOLS: + return MAX_POOLS_PER_DOMAIN; + case TMEMC_SAVE_GET_CLIENT_WEIGHT: + return client->weight == -1 ? -2 : client->weight; + case TMEMC_SAVE_GET_CLIENT_CAP: + return client->cap == -1 ? -2 : client->cap; + case TMEMC_SAVE_GET_CLIENT_FLAGS: + return (client->compress ? TMEM_CLIENT_COMPRESS : 0 ) | + (client->was_frozen ? TMEM_CLIENT_FROZEN : 0 ); + case TMEMC_SAVE_GET_POOL_FLAGS: + if ( pool == NULL ) + return -1; + return (pool->persistent ? TMEM_POOL_PERSIST : 0) | + (pool->shared ? TMEM_POOL_SHARED : 0) | + (pool->pageshift << TMEM_POOL_PAGESIZE_SHIFT); + case TMEMC_SAVE_GET_POOL_NPAGES: + if ( pool == NULL ) + return -1; + return _atomic_read(pool->pgp_count); + case TMEMC_SAVE_GET_POOL_UUID: + if ( pool == NULL ) + return -1; + uuid = (uint64_t *)buf.p; + *uuid++ = pool->uuid[0]; + *uuid = pool->uuid[1]; + return 0; + case TMEMC_SAVE_END: + client->live_migrating = 0; + if ( !list_empty(&client->persistent_invalidated_list) ) + list_for_each_entry_safe(pgp,pgp2, + &client->persistent_invalidated_list, client_inv_pages) + pgp_free_from_inv_list(client,pgp); + client->frozen = client->was_frozen; + return 0; + } + return -1; +} + +static NOINLINE int tmemc_save_get_next_page(int cli_id, int pool_id, + tmem_cli_va_t buf, uint32_t bufsize) +{ + client_t *client = tmh_client_from_cli_id(cli_id); + pool_t *pool = (client == NULL) ? NULL : client->pools[pool_id]; + pgp_t *pgp; + int ret = 0; + struct tmem_handle *h; + unsigned int pagesize = 1 << (pool->pageshift+12); + + if ( pool == NULL ) + return -1; + if ( is_ephemeral(pool) ) + return -1; + if ( bufsize < pagesize + sizeof(struct tmem_handle) ) + return -ENOMEM; + + tmem_spin_lock(&pers_lists_spinlock); + if ( list_empty(&pool->persistent_page_list) ) + { + ret = -1; + goto out; + } + /* note: pool->cur_pgp is the pgp last returned by get_next_page */ + if ( pool->cur_pgp == NULL ) + { + /* process the first one */ + pool->cur_pgp = pgp = list_entry((&pool->persistent_page_list)->next, + pgp_t,pool_pers_pages); + } else if ( list_is_last(&pool->cur_pgp->pool_pers_pages, + &pool->persistent_page_list) ) + { + /* already processed the last one in the list */ + ret = -1; + goto out; + } + pgp = list_entry((&pool->cur_pgp->pool_pers_pages)->next, + pgp_t,pool_pers_pages); + pool->cur_pgp = pgp; + h = (struct tmem_handle *)buf.p; + h->oid = pgp->obj->oid; + h->index = pgp->index; + buf.p = (void *)(h+1); + ret = do_tmem_get(pool, h->oid, h->index,0,0,0,pagesize,buf.p); + +out: + tmem_spin_unlock(&pers_lists_spinlock); + return ret; +} + +static NOINLINE int tmemc_save_get_next_inv(int cli_id, tmem_cli_va_t buf, + uint32_t bufsize) +{ + client_t *client = tmh_client_from_cli_id(cli_id); + pgp_t *pgp; + struct tmem_handle *h; + int ret = 0; + + if ( client == NULL ) + return 0; + if ( bufsize < sizeof(struct tmem_handle) ) + return 0; + tmem_spin_lock(&pers_lists_spinlock); + if ( list_empty(&client->persistent_invalidated_list) ) + goto out; + if ( client->cur_pgp == NULL ) + { + pgp = list_entry((&client->persistent_invalidated_list)->next, + pgp_t,client_inv_pages); + client->cur_pgp = pgp; + } else if ( list_is_last(&client->cur_pgp->client_inv_pages, + &client->persistent_invalidated_list) ) + { + client->cur_pgp = NULL; + ret = 0; + goto out; + } else { + pgp = list_entry((&client->cur_pgp->client_inv_pages)->next, + pgp_t,client_inv_pages); + client->cur_pgp = pgp; + } + h = (struct tmem_handle *)buf.p; + h->pool_id = pgp->pool_id; + h->oid = pgp->inv_oid; + h->index = pgp->index; + ret = 1; +out: + tmem_spin_unlock(&pers_lists_spinlock); + return ret; +} + +static int tmemc_restore_put_page(int cli_id, int pool_id, uint64_t oid, + uint32_t index, tmem_cli_va_t buf, uint32_t bufsize) +{ + client_t *client = tmh_client_from_cli_id(cli_id); + pool_t *pool = (client == NULL) ? NULL : client->pools[pool_id]; +int ret = 0; + + if ( pool == NULL ) + return -1; + return do_tmem_put(pool,oid,index,0,0,0,bufsize,buf.p); +} + +static int tmemc_restore_flush_page(int cli_id, int pool_id, uint64_t oid, + uint32_t index) +{ + client_t *client = tmh_client_from_cli_id(cli_id); + pool_t *pool = (client == NULL) ? NULL : client->pools[pool_id]; + + if ( pool == NULL ) + return -1; + return do_tmem_flush_page(pool, oid, index); +} + +static NOINLINE int do_tmem_control(struct tmem_op *op) { int ret; - cli_id_t cli_id = (cli_id_t)cli_id32; + uint32_t pool_id = op->pool_id; + uint32_t subop = op->u.ctrl.subop; if (!tmh_current_is_privileged()) { @@ -1781,18 +2137,50 @@ static int do_tmem_control(uint32_t subo case TMEMC_THAW: case TMEMC_FREEZE: case TMEMC_DESTROY: - ret = tmemc_freeze_pools(cli_id,subop); + ret = tmemc_freeze_pools(op->u.ctrl.cli_id,subop); break; case TMEMC_FLUSH: - ret = tmemc_flush_mem(cli_id,arg1); + ret = tmemc_flush_mem(op->u.ctrl.cli_id,op->u.ctrl.arg1); break; case TMEMC_LIST: - ret = tmemc_list(cli_id,buf,arg1,arg2); + ret = tmemc_list(op->u.ctrl.cli_id,op->u.ctrl.buf, + op->u.ctrl.arg1,op->u.ctrl.arg2); break; case TMEMC_SET_WEIGHT: case TMEMC_SET_CAP: case TMEMC_SET_COMPRESS: - ret = tmemc_set_var(cli_id,subop,arg1); + ret = tmemc_set_var(op->u.ctrl.cli_id,subop,op->u.ctrl.arg1); + break; + case TMEMC_SAVE_BEGIN: + case TMEMC_RESTORE_BEGIN: + case TMEMC_SAVE_GET_VERSION: + case TMEMC_SAVE_GET_MAXPOOLS: + case TMEMC_SAVE_GET_CLIENT_WEIGHT: + case TMEMC_SAVE_GET_CLIENT_CAP: + case TMEMC_SAVE_GET_CLIENT_FLAGS: + case TMEMC_SAVE_GET_POOL_FLAGS: + case TMEMC_SAVE_GET_POOL_NPAGES: + case TMEMC_SAVE_GET_POOL_UUID: + case TMEMC_SAVE_END: + ret = tmemc_save_subop(op->u.ctrl.cli_id,pool_id,subop, + op->u.ctrl.buf,op->u.ctrl.arg1); + break; + case TMEMC_SAVE_GET_NEXT_PAGE: + ret = tmemc_save_get_next_page(op->u.ctrl.cli_id, pool_id, + op->u.ctrl.buf, op->u.ctrl.arg1); + break; + case TMEMC_SAVE_GET_NEXT_INV: + ret = tmemc_save_get_next_inv(op->u.ctrl.cli_id, op->u.ctrl.buf, + op->u.ctrl.arg1); + break; + case TMEMC_RESTORE_PUT_PAGE: + ret = tmemc_restore_put_page(op->u.ctrl.cli_id,pool_id, + op->u.ctrl.arg3, op->u.ctrl.arg2, + op->u.ctrl.buf, op->u.ctrl.arg1); + break; + case TMEMC_RESTORE_FLUSH_PAGE: + ret = tmemc_restore_flush_page(op->u.ctrl.cli_id,pool_id, + op->u.ctrl.arg3, op->u.ctrl.arg2); break; default: ret = -1; @@ -1850,8 +2238,19 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop { tmem_write_lock(&tmem_rwlock); tmem_write_lock_set = 1; - rc = do_tmem_control(op.u.ctrl.subop, op.u.ctrl.cli_id, - op.u.ctrl.arg1, op.u.ctrl.arg2, op.u.ctrl.buf); + rc = do_tmem_control(&op); + goto out; + } else if ( op.cmd == TMEM_AUTH ) { + tmem_write_lock(&tmem_rwlock); + tmem_write_lock_set = 1; + rc = tmemc_shared_pool_auth(op.u.new.arg1,op.u.new.uuid[0], + op.u.new.uuid[1],op.u.new.flags); + goto out; + } else if ( op.cmd == TMEM_RESTORE_NEW ) { + tmem_write_lock(&tmem_rwlock); + tmem_write_lock_set = 1; + rc = do_tmem_new_pool(op.u.new.arg1, op.pool_id, op.u.new.flags, + op.u.new.uuid[0], op.u.new.uuid[1]); goto out; } @@ -1860,7 +2259,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop { tmem_write_lock(&tmem_rwlock); tmem_write_lock_set = 1; - if ( (client = client_create()) == NULL ) + if ( (client = client_create(tmh_get_cli_id_from_current())) == NULL ) { printk("tmem: can''t create tmem structure for %s\n",client_str); rc = -ENOMEM; @@ -1896,22 +2295,22 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop switch ( op.cmd ) { case TMEM_NEW_POOL: - rc = do_tmem_new_pool(op.u.new.flags, + rc = do_tmem_new_pool(CLI_ID_NULL, 0, op.u.new.flags, op.u.new.uuid[0], op.u.new.uuid[1]); break; case TMEM_NEW_PAGE: - rc = do_tmem_put(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn, - 0, 0, 0); + rc = do_tmem_put(pool, op.u.gen.object, + op.u.gen.index, op.u.gen.cmfn, 0, 0, 0, NULL); break; case TMEM_PUT_PAGE: - rc = do_tmem_put(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn, - 0, 0, PAGE_SIZE); + rc = do_tmem_put(pool, op.u.gen.object, + op.u.gen.index, op.u.gen.cmfn, 0, 0, PAGE_SIZE, NULL); if (rc == 1) succ_put = 1; else non_succ_put = 1; break; case TMEM_GET_PAGE: rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn, - 0, 0, PAGE_SIZE); + 0, 0, PAGE_SIZE, 0); if (rc == 1) succ_get = 1; else non_succ_get = 1; break; @@ -1930,12 +2329,13 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop case TMEM_READ: rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn, op.u.gen.tmem_offset, op.u.gen.pfn_offset, - op.u.gen.len); + op.u.gen.len,0); break; case TMEM_WRITE: - rc = do_tmem_put(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn, + rc = do_tmem_put(pool, op.u.gen.object, + op.u.gen.index, op.u.gen.cmfn, op.u.gen.tmem_offset, op.u.gen.pfn_offset, - op.u.gen.len); + op.u.gen.len, NULL); break; case TMEM_XCHG: /* need to hold global lock to ensure xchg is atomic */ diff -r 5333e6497af6 xen/common/tmem_xen.c --- a/xen/common/tmem_xen.c Mon Jul 20 15:45:50 2009 +0100 +++ b/xen/common/tmem_xen.c Wed Aug 05 11:17:18 2009 -0600 @@ -19,6 +19,9 @@ boolean_param("tmem", opt_tmem); EXPORT int opt_tmem_compress = 0; boolean_param("tmem_compress", opt_tmem_compress); + +EXPORT int opt_tmem_shared_auth = 0; +boolean_param("tmem_shared_auth", opt_tmem_shared_auth); EXPORT int opt_tmem_lock = 0; integer_param("tmem_lock", opt_tmem_lock); @@ -98,14 +101,14 @@ static inline void *cli_mfn_to_va(tmem_c EXPORT int tmh_copy_from_client(pfp_t *pfp, tmem_cli_mfn_t cmfn, uint32_t tmem_offset, - uint32_t pfn_offset, uint32_t len) + uint32_t pfn_offset, uint32_t len, void *cli_va) { unsigned long tmem_mfn; - void *tmem_va, *cli_va = NULL; + void *tmem_va; ASSERT(pfp != NULL); if ( tmem_offset || pfn_offset || len ) - if ( (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL) + if ( (cli_va == NULL) && ((cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL) ) return -EFAULT; tmem_mfn = page_to_mfn(pfp); tmem_va = map_domain_page(tmem_mfn); @@ -123,14 +126,13 @@ EXPORT int tmh_copy_from_client(pfp_t *p } EXPORT int tmh_compress_from_client(tmem_cli_mfn_t cmfn, - void **out_va, size_t *out_len) + void **out_va, size_t *out_len, void *cli_va) { - void *cli_va; int ret = 0; unsigned char *dmem = this_cpu(dstmem); unsigned char *wmem = this_cpu(workmem); - if ( (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL) + if ( (cli_va == NULL) && (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL) return -EFAULT; if ( dmem == NULL || wmem == NULL ) return 0; /* no buffer, so can''t compress */ @@ -143,13 +145,16 @@ EXPORT int tmh_compress_from_client(tmem } EXPORT int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp, - uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len) + uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cli_va) { - unsigned long tmem_mfn, cli_mfn; - void *tmem_va, *cli_va; + unsigned long tmem_mfn, cli_mfn = 0; + int mark_dirty = 1; + void *tmem_va; ASSERT(pfp != NULL); - if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL) + if ( cli_va != NULL ) + mark_dirty = 0; + else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL) return -EFAULT; tmem_mfn = page_to_mfn(pfp); tmem_va = map_domain_page(tmem_mfn); @@ -158,26 +163,35 @@ EXPORT int tmh_copy_to_client(tmem_cli_m else if ( (tmem_offset+len <= PAGE_SIZE) && (pfn_offset+len <= PAGE_SIZE) ) memcpy((char *)cli_va+pfn_offset,(char *)tmem_va+tmem_offset,len); unmap_domain_page(tmem_va); - unmap_domain_page(cli_va); - paging_mark_dirty(current->domain,cli_mfn); + if ( mark_dirty ) + { + unmap_domain_page(cli_va); + paging_mark_dirty(current->domain,cli_mfn); + } mb(); return 1; } -EXPORT int tmh_decompress_to_client(tmem_cli_mfn_t cmfn, void *tmem_va, size_t size) +EXPORT int tmh_decompress_to_client(tmem_cli_mfn_t cmfn, void *tmem_va, + size_t size, void *cli_va) { - unsigned long cli_mfn; - void *cli_va; + unsigned long cli_mfn = 0; + int mark_dirty = 1; size_t out_len = PAGE_SIZE; int ret; - if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL) + if ( cli_va != NULL ) + mark_dirty = 0; + else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL) return -EFAULT; ret = lzo1x_decompress_safe(tmem_va, size, cli_va, &out_len); ASSERT(ret == LZO_E_OK); ASSERT(out_len == PAGE_SIZE); - unmap_domain_page(cli_va); - paging_mark_dirty(current->domain,cli_mfn); + if ( mark_dirty ) + { + unmap_domain_page(cli_va); + paging_mark_dirty(current->domain,cli_mfn); + } mb(); return 1; } diff -r 5333e6497af6 xen/include/public/tmem.h --- a/xen/include/public/tmem.h Mon Jul 20 15:45:50 2009 +0100 +++ b/xen/include/public/tmem.h Wed Aug 05 11:17:18 2009 -0600 @@ -42,15 +42,36 @@ #define TMEM_WRITE 9 #define TMEM_XCHG 10 +/* Privileged commands to HYPERVISOR_tmem_op() */ +#define TMEM_AUTH 101 +#define TMEM_RESTORE_NEW 102 + /* Subops for HYPERVISOR_tmem_op(TMEM_CONTROL) */ -#define TMEMC_THAW 0 -#define TMEMC_FREEZE 1 -#define TMEMC_FLUSH 2 -#define TMEMC_DESTROY 3 -#define TMEMC_LIST 4 -#define TMEMC_SET_WEIGHT 5 -#define TMEMC_SET_CAP 6 -#define TMEMC_SET_COMPRESS 7 +#define TMEMC_THAW 0 +#define TMEMC_FREEZE 1 +#define TMEMC_FLUSH 2 +#define TMEMC_DESTROY 3 +#define TMEMC_LIST 4 +#define TMEMC_SET_WEIGHT 5 +#define TMEMC_SET_CAP 6 +#define TMEMC_SET_COMPRESS 7 +#define TMEMC_SHARED_POOL_AUTH 8 +#define TMEMC_SHARED_POOL_DEAUTH 9 +#define TMEMC_SAVE_BEGIN 10 +#define TMEMC_SAVE_GET_VERSION 11 +#define TMEMC_SAVE_GET_MAXPOOLS 12 +#define TMEMC_SAVE_GET_CLIENT_WEIGHT 13 +#define TMEMC_SAVE_GET_CLIENT_CAP 14 +#define TMEMC_SAVE_GET_CLIENT_FLAGS 15 +#define TMEMC_SAVE_GET_POOL_FLAGS 16 +#define TMEMC_SAVE_GET_POOL_NPAGES 17 +#define TMEMC_SAVE_GET_POOL_UUID 18 +#define TMEMC_SAVE_GET_NEXT_PAGE 19 +#define TMEMC_SAVE_GET_NEXT_INV 20 +#define TMEMC_SAVE_END 21 +#define TMEMC_RESTORE_BEGIN 30 +#define TMEMC_RESTORE_PUT_PAGE 32 +#define TMEMC_RESTORE_FLUSH_PAGE 33 /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ #define TMEM_POOL_PERSIST 1 @@ -60,6 +81,10 @@ #define TMEM_POOL_VERSION_SHIFT 24 #define TMEM_POOL_VERSION_MASK 0xff +/* Bits for client flags (save/restore) */ +#define TMEM_CLIENT_COMPRESS 1 +#define TMEM_CLIENT_FROZEN 2 + /* Special errno values */ #define EFROZEN 1000 #define EEMPTY 1001 @@ -70,31 +95,40 @@ typedef XEN_GUEST_HANDLE(char) tmem_cli_ typedef XEN_GUEST_HANDLE(char) tmem_cli_va_t; struct tmem_op { uint32_t cmd; - int32_t pool_id; /* private > 0; shared < 0; 0 is invalid */ + int32_t pool_id; union { - struct { /* for cmd == TMEM_NEW_POOL */ + struct { uint64_t uuid[2]; uint32_t flags; - } new; - struct { /* for cmd == TMEM_CONTROL */ + uint32_t arg1; + } new; /* for cmd == TMEM_NEW_POOL, TMEM_AUTH, TMEM_RESTORE_NEW */ + struct { uint32_t subop; uint32_t cli_id; uint32_t arg1; uint32_t arg2; + uint64_t arg3; tmem_cli_va_t buf; - } ctrl; + } ctrl; /* for cmd == TMEM_CONTROL */ struct { + uint64_t object; uint32_t index; uint32_t tmem_offset; uint32_t pfn_offset; uint32_t len; tmem_cli_mfn_t cmfn; /* client machine page frame */ - } gen; + } gen; /* for all other cmd ("generic") */ } u; }; typedef struct tmem_op tmem_op_t; DEFINE_XEN_GUEST_HANDLE(tmem_op_t); + +struct tmem_handle { + uint32_t pool_id; + uint32_t index; + uint64_t oid; +}; #endif diff -r 5333e6497af6 xen/include/xen/tmem_xen.h --- a/xen/include/xen/tmem_xen.h Mon Jul 20 15:45:50 2009 +0100 +++ b/xen/include/xen/tmem_xen.h Wed Aug 05 11:17:18 2009 -0600 @@ -53,6 +53,12 @@ static inline int tmh_compression_enable static inline int tmh_compression_enabled(void) { return opt_tmem_compress; +} + +extern int opt_tmem_shared_auth; +static inline int tmh_shared_auth(void) +{ + return opt_tmem_shared_auth; } extern int opt_tmem; @@ -271,9 +277,10 @@ static inline tmh_cli_ptr_t *tmh_get_cli return current->domain; } -static inline void tmh_set_current_client(struct client *client) +static inline void tmh_set_client_from_id(struct client *client,cli_id_t cli_id) { - current->domain->tmem = client; + struct domain *d = get_domain_by_id(cli_id); + d->tmem = client; } static inline bool_t tmh_current_is_privileged(void) @@ -301,9 +308,11 @@ static inline int tmh_get_tmemop_from_cl return rc; switch ( cop.cmd ) { - case TMEM_NEW_POOL: u = XLAT_tmem_op_u_new; break; - case TMEM_CONTROL: u = XLAT_tmem_op_u_ctrl; break; - default: u = XLAT_tmem_op_u_gen; break; + case TMEM_NEW_POOL: u = XLAT_tmem_op_u_new; break; + case TMEM_CONTROL: u = XLAT_tmem_op_u_ctrl; break; + case TMEM_AUTH: u = XLAT_tmem_op_u_new; break; + case TMEM_RESTORE_NEW:u = XLAT_tmem_op_u_new; break; + default: u = XLAT_tmem_op_u_gen ; break; } #define XLAT_tmem_op_HNDL_u_ctrl_buf(_d_, _s_) \ guest_from_compat_handle((_d_)->u.ctrl.buf, (_s_)->u.ctrl.buf) @@ -326,16 +335,16 @@ static inline void tmh_copy_to_client_bu #define tmh_cli_id_str "domid" #define tmh_client_str "domain" -extern int tmh_decompress_to_client(tmem_cli_mfn_t,void*,size_t); +extern int tmh_decompress_to_client(tmem_cli_mfn_t,void*,size_t,void*); -extern int tmh_compress_from_client(tmem_cli_mfn_t,void**,size_t *); +extern int tmh_compress_from_client(tmem_cli_mfn_t,void**,size_t *,void*); extern int tmh_copy_from_client(pfp_t *pfp, tmem_cli_mfn_t cmfn, uint32_t tmem_offset, - uint32_t pfn_offset, uint32_t len); + uint32_t pfn_offset, uint32_t len, void *cva); extern int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp, - uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len); + uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cva); #define TMEM_PERF _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel