Hi, I am new to clustering and was wondering why pvclust using "maximum" as distance measure nearly always results in p-values above 95%. I wrote an example programme which demonstrates this effect. I uploaded a PDF showing the results Here is the code which produces the PDF file: ------------------------------------------------------------------------------------- s <- matrix(runif(1600,0,1000), nrow=20) a.res1 <- pvclust(t(s), method.hclust="complete", method.dist="euclidian", nboot=500) a.res2 <- pvclust(t(s), method.hclust="complete", method.dist="maximum", nboot=500) a.res3 <- pvclust(t(s), method.hclust="complete", method.dist="canberra", nboot=500) a.res4 <- pvclust(t(s), method.hclust="ward", method.dist="euclidian", nboot=500) a.res5 <- pvclust(t(s), method.hclust="ward", method.dist="maximum", nboot=500) a.res6 <- pvclust(t(s), method.hclust="ward", method.dist="canberra", nboot=500) a.res7 <- pvclust(t(s), method.hclust="average", method.dist="euclidian", nboot=500) a.res8 <- pvclust(t(s), method.hclust="average", method.dist="maximum", nboot=500) a.res9 <- pvclust(t(s), method.hclust="average", method.dist="canberra", nboot=500) s <- matrix(runif(1600,0,2), nrow=20) b.res1 <- pvclust(t(s), method.hclust="complete", method.dist="euclidian", nboot=500) b.res2 <- pvclust(t(s), method.hclust="complete", method.dist="maximum", nboot=500) b.res3 <- pvclust(t(s), method.hclust="complete", method.dist="canberra", nboot=500) b.res4 <- pvclust(t(s), method.hclust="ward", method.dist="euclidian", nboot=500) b.res5 <- pvclust(t(s), method.hclust="ward", method.dist="maximum", nboot=500) b.res6 <- pvclust(t(s), method.hclust="ward", method.dist="canberra", nboot=500) b.res7 <- pvclust(t(s), method.hclust="average", method.dist="euclidian", nboot=500) b.res8 <- pvclust(t(s), method.hclust="average", method.dist="maximum", nboot=500) b.res9 <- pvclust(t(s), method.hclust="average", method.dist="canberra", nboot=500) pdf("YOUR PATH/Res.pdf", width=10, height=15) par(mfrow=c(6,3)) plot(a.res1) plot(a.res2) plot(a.res3) plot(a.res4) plot(a.res5) plot(a.res6) plot(a.res7) plot(a.res8) plot(a.res9) plot(b.res1) plot(b.res2) plot(b.res3) plot(b.res4) plot(b.res5) plot(b.res6) plot(b.res7) plot(b.res8) plot(b.res9) dev.off() -------------------------------------------------------------- Why is that??? Cheers syrvn http://r.789695.n4.nabble.com/file/n2295594/Res.pdf Res.pdf -- View this message in context: http://r.789695.n4.nabble.com/p-values-pvclust-maximum-distance-measure-tp2295594p2295594.html Sent from the R help mailing list archive at Nabble.com.
Hi, I am new to clustering and was wondering why pvclust using "maximum" as distance measure nearly always results in p-values above 95%. I wrote an example programme which demonstrates this effect. I uploaded a PDF showing the results Here is the code which produces the PDF file: ------------------------------------------------------------------------------------- s <- matrix(runif(1600,0,1000), nrow=20) a.res1 <- pvclust(t(s), method.hclust="complete", method.dist="euclidian", nboot=500) a.res2 <- pvclust(t(s), method.hclust="complete", method.dist="maximum", nboot=500) a.res3 <- pvclust(t(s), method.hclust="complete", method.dist="canberra", nboot=500) a.res4 <- pvclust(t(s), method.hclust="ward", method.dist="euclidian", nboot=500) a.res5 <- pvclust(t(s), method.hclust="ward", method.dist="maximum", nboot=500) a.res6 <- pvclust(t(s), method.hclust="ward", method.dist="canberra", nboot=500) a.res7 <- pvclust(t(s), method.hclust="average", method.dist="euclidian", nboot=500) a.res8 <- pvclust(t(s), method.hclust="average", method.dist="maximum", nboot=500) a.res9 <- pvclust(t(s), method.hclust="average", method.dist="canberra", nboot=500) s <- matrix(runif(1600,0,2), nrow=20) b.res1 <- pvclust(t(s), method.hclust="complete", method.dist="euclidian", nboot=500) b.res2 <- pvclust(t(s), method.hclust="complete", method.dist="maximum", nboot=500) b.res3 <- pvclust(t(s), method.hclust="complete", method.dist="canberra", nboot=500) b.res4 <- pvclust(t(s), method.hclust="ward", method.dist="euclidian", nboot=500) b.res5 <- pvclust(t(s), method.hclust="ward", method.dist="maximum", nboot=500) b.res6 <- pvclust(t(s), method.hclust="ward", method.dist="canberra", nboot=500) b.res7 <- pvclust(t(s), method.hclust="average", method.dist="euclidian", nboot=500) b.res8 <- pvclust(t(s), method.hclust="average", method.dist="maximum", nboot=500) b.res9 <- pvclust(t(s), method.hclust="average", method.dist="canberra", nboot=500) pdf("YOUR PATH/Res.pdf", width=10, height=15) par(mfrow=c(6,3)) plot(a.res1) plot(a.res2) plot(a.res3) plot(a.res4) plot(a.res5) plot(a.res6) plot(a.res7) plot(a.res8) plot(a.res9) plot(b.res1) plot(b.res2) plot(b.res3) plot(b.res4) plot(b.res5) plot(b.res6) plot(b.res7) plot(b.res8) plot(b.res9) dev.off() -------------------------------------------------------------- Why is that??? Cheers syrvn http://r.789695.n4.nabble.com/file/n2295593/Res.pdf Res.pdf -- View this message in context: http://r.789695.n4.nabble.com/p-values-pvclust-maximum-distance-measure-tp2295593p2295593.html Sent from the R help mailing list archive at Nabble.com.