Dan Magenheimer
2009-Jun-01 17:15 UTC
[Xen-devel] [PATCH] tmem: shared ephemeral pool (clustering) fixes
tmem: shared ephemeral (SE) pool (clustering) fixes (Keir, if you''d prefer me to send tmem patches individually rather than batch them, please let me know.) Tmem can share clean page cache pages for Linux domains in a virtual cluster (currently only the ocfs2 filesystem has a patch on the Linux side). So when one domain "puts" (evicts) a page, any domain in the cluster can "get" it, thus saving disk reads. This functionality is already present; these are only bug fixes. - fix bugs when an SE pool is destroyed - fixes in parsing tool for xm tmem-list output for SE pools - incorrect locking in one case for destroying an SE pool - clearer verbosity for transfer when an SE pool is destroyed - minor cleanup: merge routines that are mostly duplicate Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com) diff -r 027f19e97e28 tools/misc/xen-tmem-list-parse.c --- a/tools/misc/xen-tmem-list-parse.c Mon Jun 01 15:52:19 2009 +0100 +++ b/tools/misc/xen-tmem-list-parse.c Mon Jun 01 11:07:41 2009 -0600 @@ -26,6 +26,20 @@ unsigned long long parse(char *s,char *m if ( *s1++ != '':'' ) return 0LL; sscanf(s1,"%llu",&ret); + return ret; +} + +unsigned long long parse_hex(char *s,char *match) +{ + char *s1 = strstr(s,match); + unsigned long long ret; + + if ( s1 == NULL ) + return 0LL; + s1 += 2; + if ( *s1++ != '':'' ) + return 0LL; + sscanf(s1,"%llx",&ret); return ret; } @@ -64,7 +78,7 @@ void parse_sharers(char *s, char *match, s1 += 2; if (*s1++ != '':'') return; - while (*s1 <= ''0'' && *s1 <= ''9'') + while (*s1 >= ''0'' && *s1 <= ''9'') *b++ = *s1++; *b++ = '',''; s1 = strstr(s1,match); @@ -196,6 +210,8 @@ void parse_pool(char *s) unsigned long long flush_objs = parse(s,"ot"); parse_string(s,"PT",pool_type,2); + if (pool_type[1] == ''S'') + return; /* no need to repeat print data for shared pools */ printf("domid%lu,id%lu[%s]:pgp=%llu(max=%llu) obj=%llu(%llu) " "objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) " "gets=%llu/%llu(%llu%%) " @@ -216,8 +232,8 @@ void parse_shared_pool(char *s) char pool_type[3]; char buf[BUFSIZE]; unsigned long pool_id = parse(s,"PI"); - unsigned long long uid0 = parse(s,"U0"); - unsigned long long uid1 = parse(s,"U1"); + unsigned long long uid0 = parse_hex(s,"U0"); + unsigned long long uid1 = parse_hex(s,"U1"); unsigned long long pgp_count = parse(s,"Pc"); unsigned long long max_pgp_count = parse(s,"Pm"); unsigned long long obj_count = parse(s,"Oc"); @@ -238,7 +254,7 @@ void parse_shared_pool(char *s) parse_string(s,"PT",pool_type,2); parse_sharers(s,"SC",buf,BUFSIZE); - printf("poolid=%lu[%s] uuid=%llu.%llu, shared-by:%s: " + printf("poolid=%lu[%s] uuid=%llx.%llx, shared-by:%s: " "pgp=%llu(max=%llu) obj=%llu(%llu) " "objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) " "gets=%llu/%llu(%llu%%) " diff -r 027f19e97e28 xen/common/tmem.c --- a/xen/common/tmem.c Mon Jun 01 15:52:19 2009 +0100 +++ b/xen/common/tmem.c Mon Jun 01 11:07:41 2009 -0600 @@ -581,21 +581,6 @@ static NOINLINE void obj_free(obj_t *obj tmem_free(obj,sizeof(obj_t),pool); } -static NOINLINE void obj_rb_destroy_node(struct rb_node *node) -{ - obj_t * obj; - - if ( node == NULL ) - return; - obj_rb_destroy_node(node->rb_left); - obj_rb_destroy_node(node->rb_right); - obj = container_of(node, obj_t, rb_tree_node); - tmem_spin_lock(&obj->obj_spinlock); - ASSERT(obj->no_evict == 0); - radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free); - obj_free(obj,1); -} - static NOINLINE int obj_rb_insert(struct rb_root *root, obj_t *obj) { struct rb_node **new, *parent = NULL; @@ -650,26 +635,15 @@ static NOINLINE obj_t * obj_new(pool_t * } /* free an object after destroying any pgps in it */ -static NOINLINE void obj_destroy(obj_t *obj) +static NOINLINE void obj_destroy(obj_t *obj, int no_rebalance) { ASSERT_WRITELOCK(&obj->pool->pool_rwlock); radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free); - obj_free(obj,0); + obj_free(obj,no_rebalance); } -/* destroy all objects in a pool */ -static NOINLINE void obj_rb_destroy_all(pool_t *pool) -{ - int i; - - tmem_write_lock(&pool->pool_rwlock); - for (i = 0; i < OBJ_HASH_BUCKETS; i++) - obj_rb_destroy_node(pool->obj_rb_root[i].rb_node); - tmem_write_unlock(&pool->pool_rwlock); -} - -/* destroys all objects in a pool that have last_client set to cli_id */ -static void obj_free_selective(pool_t *pool, cli_id_t cli_id) +/* destroys all objs in a pool, or only if obj->last_client matches cli_id */ +static void pool_destroy_objs(pool_t *pool, bool_t selective, cli_id_t cli_id) { struct rb_node *node; obj_t *obj; @@ -684,8 +658,11 @@ static void obj_free_selective(pool_t *p obj = container_of(node, obj_t, rb_tree_node); tmem_spin_lock(&obj->obj_spinlock); node = rb_next(node); - if ( obj->last_client == cli_id ) - obj_destroy(obj); + ASSERT(obj->no_evict == 0); + if ( !selective ) + obj_destroy(obj,1); + else if ( obj->last_client == cli_id ) + obj_destroy(obj,0); else tmem_spin_unlock(&obj->obj_spinlock); } @@ -740,8 +717,9 @@ static int shared_pool_join(pool_t *pool return -1; sl->client = new_client; list_add_tail(&sl->share_list, &pool->share_list); - printk("adding new %s %d to shared pool owned by %s %d\n", - client_str, new_client->cli_id, client_str, pool->client->cli_id); + if ( new_client->cli_id != pool->client->cli_id ) + printk("adding new %s %d to shared pool owned by %s %d\n", + client_str, new_client->cli_id, client_str, pool->client->cli_id); return ++pool->shared_count; } @@ -766,6 +744,10 @@ static NOINLINE void shared_pool_reassig if (new_client->pools[poolid] == pool) break; ASSERT(poolid != MAX_POOLS_PER_DOMAIN); + new_client->eph_count += _atomic_read(pool->pgp_count); + old_client->eph_count -= _atomic_read(pool->pgp_count); + list_splice_init(&old_client->ephemeral_page_list, + &new_client->ephemeral_page_list); printk("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n", cli_id_str, old_client->cli_id, cli_id_str, new_client->cli_id, poolid); pool->pool_id = poolid; @@ -781,7 +763,8 @@ static NOINLINE int shared_pool_quit(poo ASSERT(is_shared(pool)); ASSERT(pool->client != NULL); - obj_free_selective(pool,cli_id); + ASSERT_WRITELOCK(&tmem_rwlock); + pool_destroy_objs(pool,1,cli_id); list_for_each_entry(sl,&pool->share_list, share_list) { if (sl->client->cli_id != cli_id) @@ -812,15 +795,15 @@ static void pool_flush(pool_t *pool, cli ASSERT(pool != NULL); if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) ) { - printk("tmem: unshared shared pool %d from %s=%d\n", - pool->pool_id, cli_id_str,pool->client->cli_id); + printk("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n", + cli_id_str, cli_id, pool->pool_id, cli_id_str,pool->client->cli_id); return; } printk("%s %s-%s tmem pool ",destroy?"destroying":"flushing", is_persistent(pool) ? "persistent" : "ephemeral" , is_shared(pool) ? "shared" : "private"); printk("%s=%d pool_id=%d\n", cli_id_str,pool->client->cli_id,pool->pool_id); - obj_rb_destroy_all(pool); + pool_destroy_objs(pool,0,CLI_ID_NULL); if ( destroy ) { pool->client->pools[pool->pool_id] = NULL; @@ -1378,7 +1361,7 @@ static NOINLINE int do_tmem_flush_object if ( obj == NULL ) goto out; tmem_write_lock(&pool->pool_rwlock); - obj_destroy(obj); + obj_destroy(obj,0); pool->flush_objs_found++; tmem_write_unlock(&pool->pool_rwlock); @@ -1455,7 +1438,7 @@ static NOINLINE int do_tmem_new_pool(uin { if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi ) { - printk("(matches shared pool uuid=%"PRIx64".%"PRIu64") ", + printk("(matches shared pool uuid=%"PRIx64".%"PRIx64") ", uuid_hi, uuid_lo); printk("pool_id=%d\n",d_poolid); client->pools[d_poolid] = global_shared_pools[s_poolid]; @@ -1507,10 +1490,8 @@ static int tmemc_freeze_pools(int cli_id if ( cli_id == CLI_ID_NULL ) { list_for_each_entry(client,&global_client_list,client_list) - { client->frozen = freeze; - printk("tmem: all pools %s for all %ss\n",s,client_str); - } + printk("tmem: all pools %s for all %ss\n",s,client_str); } else { @@ -1878,7 +1859,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop } } - if ( op.cmd == TMEM_NEW_POOL ) + if ( op.cmd == TMEM_NEW_POOL || op.cmd == TMEM_DESTROY_POOL ) { if ( !tmem_write_lock_set ) { _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel