Last year there was a request seeking functions to plot Venn diagrams in R. Seeing no reply and for other reasons needing this, I wrote a quick routine. The general problem of creating a Venn diagram with overlap areas proportional to the number of counts in each overlap is over determined. An approximate solution is to make the area of the circles and pair wise overlap areas proportional to the number of counts in each. Using just the pair wise overlaps ignores (or hopes for the best on :) ) the area of the three-way overlap. R code that implements this is shown below. Usage: A, B and C are Boolean vectors of equal length, e.g. A = runif(100) < 0.5 B = runif(100) < 0.4 C = runif(100) < 0.6 d = list() d$table = table(A,B,C) d$labels = c("A","B","C") plot.venn.diagram(d) This is pre-alpha code, caveat emptor. In particular, the code to position the labels needs work. If someone wants to convert this to a package or add it to an existing package, feel free to do so. To optimize the solution over all of the overlaps or if you have more than three sets, you can use a random sampling approach, but implementing this in R is too slow to be useful. David David J. States, M.D., Ph.D. Professor of Human Genetics Director of Bioinformatics University of Michigan School of Medicine Medical Science Building I, Room 5443 Ann Arbor, MI 48109 USA venn.overlap <- function(r, a, b, target = 0) { # # calculate the overlap area for circles of radius a and b # with centers separated by r # target is included for the root finding code # pi = acos(-1) if(r >= a + b) { return( - target) } if(r < a - b) { return(pi * b * b - target) } if(r < b - a) { return(pi * a * a - target) } s = (a + b + r)/2 triangle.area = sqrt(s * (s - a) * (s - b) * (s - r)) h = (2 * triangle.area)/r aa = 2 * atan(sqrt(((s - r) * (s - a))/(s * (s - b)))) ab = 2 * atan(sqrt(((s - r) * (s - b))/(s * (s - a)))) sector.area = aa * (a * a) + ab * (b * b) overlap = sector.area - 2 * triangle.area return(overlap - target) } plot.venn.diagram <- function(d) { # # Draw Venn diagrams with proportional overlaps # d$table = 3 way table of overlaps # d$labels = array of character string to use as labels # pi = acos(-1) csz = 0.1 # Normalize the data n = length(dim(d$table)) c1 = vector(length = n) c1[1] = sum(d$table[2, , ]) c1[2] = sum(d$table[, 2, ]) c1[3] = sum(d$table[, , 2]) n1 = c1 # c2 = matrix(nrow = n, ncol = n, 0) c2[1, 2] = sum(d$table[2, 2, ]) c2[2, 1] = c2[1, 2] c2[1, 3] = sum(d$table[2, , 2]) c2[3, 1] = c2[1, 3] c2[2, 3] = sum(d$table[, 2, 2]) c2[3, 2] = c2[2, 3] n2 = c2 # c3 = d$table[2, 2, 2] n3 = c3 c2 = c2/sum(c1) c3 = c3/sum(c1) c1 = c1/sum(c1) n = length(c1) # Radii are set so the area is proporitional to number of counts pi = acos(-1) r = sqrt(c1/pi) r12 = uniroot(venn.overlap, interval = c(max(r[1] - r[2], r[2] - r[1], 0) + 0.01, r[1] + r[2] - 0.01), a = r[1], b = r[ 2], target = c2[1, 2])$root r13 = uniroot(venn.overlap, interval = c(max(r[1] - r[3], r[3] - r[1], 0) + 0.01, r[1] + r[3] - 0.01), a = r[1], b = r[ 3], target = c2[1, 3])$root r23 = uniroot(venn.overlap, interval = c(max(r[2] - r[3], r[3] - r[2], 0) + 0.01, r[2] + r[3] - 0.01), a = r[2], b = r[ 3], target = c2[2, 3])$root s = (r12 + r13 + r23)/2 x = vector() y = vector() x[1] = 0 y[1] = 0 x[2] = r12 y[2] = 0 angle = 2 * atan(sqrt(((s - r12) * (s - r13))/(s * (s - r13)))) x[3] = r13 * cos(angle) y[3] = r13 * sin(angle) xc = cos(seq(from = 0, to = 2 * pi, by = 0.01)) yc = sin(seq(from = 0, to = 2 * pi, by = 0.01)) cmx = sum(x * c1) cmy = sum(y * c1) x = x - cmx y = y - cmy rp=sqrt(x*x + y*y) frame() par(usr = c(-1, 1, -1, 1), pty = "s") box() for(i in 1:3) { lines(xc * r[i] + x[i], yc * r[i] + y[i]) } xl = (rp[1] + (0.7 * r[1])) * x[1]/rp[1] yl = (rp[1] + (0.7 * r[1])) * y[1]/rp[1] text(xl, yl, d$labels[1]) text(xl, yl - csz, d$table[2, 1, 1]) xl = (rp[2] + (0.7 * r[2])) * x[2]/rp[2] yl = (rp[2] + (0.7 * r[2])) * y[2]/rp[2] text(xl, yl, d$labels[2]) text(xl, yl - csz, d$table[1, 2, 1]) xl = (rp[3] + (0.7 * r[3])) * x[3]/rp[3] yl = (rp[3] + (0.7 * r[3])) * y[3]/rp[3] text(xl, yl, d$labels[3]) text(xl, yl - csz, d$table[1, 1, 2]) # text((x[1] + x[2])/2, (y[1] + y[2])/2, d$table[2, 2, 1]) text((x[1] + x[3])/2, (y[1] + y[3])/2, d$table[2, 1, 2]) text((x[2] + x[3])/2, (y[2] + y[3])/2, d$table[1, 2, 2]) # text(0, 0, n3) list(r = r, x = x, y = y, dist = c(r12, r13, r23), count1 = c1, count2 c2, labels = d$labels) }