Hi all, This is a new release of blkio-cgroup v10. This release reduces IO tracking overhead and fixes an issue that could cause a deadlock since lock_page_cgroup() is no longer used. Thank you KAMEZAWA-san for your suggestions and pointing out the issue. This patch can be applied to 2.6.31-rc3-mmotm0716 and 2.6.31-rc4. The list of the patches: [PATCH 1/7] blkio-cgroup-v10: Introduction [PATCH 2/7] blkio-cgroup-v10: The new page_cgroup framework [PATCH 3/7] blkio-cgroup-v10: Refactoring io-context initialization [PATCH 4/7] blkio-cgroup-v10: The body of blkio-cgroup [PATCH 5/7] blkio-cgroup-v10: The document of blkio-cgroup [PATCH 6/7] blkio-cgroup-v10: Page tracking hooks [PATCH 7/7] blkio-cgroup-v10: Add a cgroup support to dm-ioband About blkio-cgroup: blkio-cgroup is a block I/O tracking mechanism implemented on the cgroup memory subsystem. Using this feature the owners of any type of I/O can be determined. This allows dm-ioband to control block I/O bandwidth even when it is accepting delayed write requests. dm-ioband can find the cgroup of each request. It is also for possible that others working on I/O bandwidth throttling to use this functionality to control asynchronous I/O with a little enhancement. Please visit our website, the patches and more information are available. Linux Block I/O Bandwidth Control Project http://sourceforge.net/apps/trac/ioband/ I'd like to get some feedbacks from the list. Any comments are appreciated. Thanks, Ryo Tsuruta
Ryo Tsuruta
2009-Jul-28 12:17 UTC
[PATCH 2/7] blkio-cgroup-v10: The new page_cgroup framework
This patch makes the page_cgroup framework be able to be used even if the compile option of the cgroup memory controller is off. So blkio-cgroup can use this framework without the memory controller. Signed-off-by: Hirokazu Takahashi <taka at valinux.co.jp> Signed-off-by: Ryo Tsuruta <ryov at valinux.co.jp> --- include/linux/memcontrol.h | 6 ++++++ include/linux/mmzone.h | 4 ++-- include/linux/page_cgroup.h | 5 +++-- init/Kconfig | 4 ++++ mm/Makefile | 3 ++- mm/memcontrol.c | 6 ++++++ mm/page_cgroup.c | 3 +-- 7 files changed, 24 insertions(+), 7 deletions(-) Index: linux-2.6.31-rc3-mm1/include/linux/memcontrol.h ==================================================================--- linux-2.6.31-rc3-mm1.orig/include/linux/memcontrol.h +++ linux-2.6.31-rc3-mm1/include/linux/memcontrol.h @@ -37,6 +37,8 @@ struct mm_struct; * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.) */ +extern void __init_mem_page_cgroup(struct page_cgroup *pc); + extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ @@ -121,6 +123,10 @@ void mem_cgroup_update_mapped_file_stat( #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; +static inline void __init_mem_page_cgroup(struct page_cgroup *pc) +{ +} + static inline int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { Index: linux-2.6.31-rc3-mm1/include/linux/mmzone.h ==================================================================--- linux-2.6.31-rc3-mm1.orig/include/linux/mmzone.h +++ linux-2.6.31-rc3-mm1/include/linux/mmzone.h @@ -609,7 +609,7 @@ typedef struct pglist_data { int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ struct page *node_mem_map; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_CGROUP_PAGE struct page_cgroup *node_page_cgroup; #endif #endif @@ -960,7 +960,7 @@ struct mem_section { /* See declaration of similar field in struct zone */ unsigned long *pageblock_flags; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_CGROUP_PAGE /* * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use * section. (see memcontrol.h/page_cgroup.h about this.) Index: linux-2.6.31-rc3-mm1/include/linux/page_cgroup.h ==================================================================--- linux-2.6.31-rc3-mm1.orig/include/linux/page_cgroup.h +++ linux-2.6.31-rc3-mm1/include/linux/page_cgroup.h @@ -1,7 +1,7 @@ #ifndef __LINUX_PAGE_CGROUP_H #define __LINUX_PAGE_CGROUP_H -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_CGROUP_PAGE #include <linux/bit_spinlock.h> /* * Page Cgroup can be considered as an extended mem_map. @@ -14,6 +14,7 @@ struct page_cgroup { unsigned long flags; struct mem_cgroup *mem_cgroup; struct page *page; + unsigned long blkio_cgroup_id; struct list_head lru; /* per cgroup LRU list */ }; @@ -96,7 +97,7 @@ static inline void unlock_page_cgroup(st bit_spin_unlock(PCG_LOCK, &pc->flags); } -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +#else /* CONFIG_CGROUP_PAGE */ struct page_cgroup; static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) Index: linux-2.6.31-rc3-mm1/init/Kconfig ==================================================================--- linux-2.6.31-rc3-mm1.orig/init/Kconfig +++ linux-2.6.31-rc3-mm1/init/Kconfig @@ -610,6 +610,10 @@ config CGROUP_MEM_RES_CTLR_SWAP endif # CGROUPS +config CGROUP_PAGE + def_bool y + depends on CGROUP_MEM_RES_CTLR + config MM_OWNER bool Index: linux-2.6.31-rc3-mm1/mm/Makefile ==================================================================--- linux-2.6.31-rc3-mm1.orig/mm/Makefile +++ linux-2.6.31-rc3-mm1/mm/Makefile @@ -40,6 +40,7 @@ else obj-$(CONFIG_SMP) += allocpercpu.o endif obj-$(CONFIG_QUICKLIST) += quicklist.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o +obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o +obj-$(CONFIG_CGROUP_PAGE) += page_cgroup.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o Index: linux-2.6.31-rc3-mm1/mm/memcontrol.c ==================================================================--- linux-2.6.31-rc3-mm1.orig/mm/memcontrol.c +++ linux-2.6.31-rc3-mm1/mm/memcontrol.c @@ -130,6 +130,12 @@ struct mem_cgroup_lru_info { struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES]; }; +void __meminit __init_mem_page_cgroup(struct page_cgroup *pc) +{ + pc->mem_cgroup = NULL; + INIT_LIST_HEAD(&pc->lru); +} + /* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide Index: linux-2.6.31-rc3-mm1/mm/page_cgroup.c ==================================================================--- linux-2.6.31-rc3-mm1.orig/mm/page_cgroup.c +++ linux-2.6.31-rc3-mm1/mm/page_cgroup.c @@ -14,9 +14,8 @@ static void __meminit __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) { pc->flags = 0; - pc->mem_cgroup = NULL; pc->page = pfn_to_page(pfn); - INIT_LIST_HEAD(&pc->lru); + __init_mem_page_cgroup(pc); } static unsigned long total_usage;