On 09/15/2014 10:57 AM, eliza botto wrote:> Dear useRs of R,
> I have two datasets (TT and SS) and i wanted to to see if my data is
uniformly distributed or not?I tested it through chi-square test and results are
given at the end of it.Now apparently P-value has a significant importance but I
cant interpret the results and why it says that "In chisq.test(TT) :
Chi-squared approximation may be incorrect"
> ###############################################################
>> dput(TT)
> structure(list(clc5 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.26, 0.14, 0,
0.44, 0.26, 0, 0, 0, 0, 0, 0, 0.11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.17, 0.16,
0.56, 0, 1.49, 0, 0.64, 0.79, 0.66, 0, 0, 0.17, 0, 0, 0, 0, 0.56, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0.43, 0.41, 0, 0.5, 0.44, 0, 0, 0, 0, 0.09, 0.46, 0, 0.27,
0.45, 0.15, 0.31, 0.16, 0.44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.07, 0, 0, 0, 0, 0,
0.06, 0, 0.09, 0.07, 0, 0, 7.89, 0, 0.22, 0.29, 0.33, 0.27, 0, 0.36, 0.41, 0, 0,
0, 0, 0.55, 0.81, 0, 0.09, 0.13, 0.28, 0, 0, 0), quota_massima = c(1167L, 1167L,
4572L, 3179L, 3141L, 585L, 585L, 876L, 876L, 1678L, 2667L, 1369L, 1369L, 1369L,
1381L, 1381L, 1381L, 1381L, 2284L, 410L, 2109L, 2507L, 2579L, 2507L, 1436L,
3234L, 3234L, 3234L, 3234L, 2792L, 2569L, 2569L, 2569L, 1669L, 4743L, 4743L,
4743L, 3403L, 3197L, 3267L, 3583L, 3583L, 3583L, 2584L, 2584L, 2579L, 1241L,
1241L, 4174L, 3006L, 3197L, 2366L, 2618L, 2670L, 4487L, 3196L, 3196L, 2107L,
2107L, 2427L, 1814L, 2622L, 1268L, 1268L, 1268L,!
> 3885L, 3885L, 3092L, 3234L, 2625L, 2625L, 3760L, 4743L, 3707L, 3760L,
4743L, 3760L, 3885L, 3760L, 4743L, 2951L, 782L, 2957L, 3343L, 2697L, 2697L,
3915L, 2277L, 1678L, 1678L, 3197L, 2957L, 2957L, 2957L, 4530L, 4530L, 4530L,
2131L, 3618L, 3618L, 3335L, 2512L, 2390L, 1616L, 3526L, 3197L, 3197L, 2625L,
2622L, 3197L, 3197L, 2622L, 2622L, 2622L, 368L, 4572L, 3953L, 863L, 3716L,
3716L, 3716L, 2697L, 2697L, 1358L)), .Names = c("clc5",
"quota_massima"), class = "data.frame", row.names = c(NA,
-124L))
>
>> chisq.test(TT)
> Pearson's Chi-squared test
> data: TT
> X-squared = 411.5517, df = 123, p-value < 2.2e-16
> Warning message:
> In chisq.test(TT) : Chi-squared approximation may be incorrect
> #######################################################################
>> dput(SS)
> structure(list(NDVIanno = c(0.57, 0.536, 0.082, 0.262, 0.209, 0.539, 0.536,
0.543, 0.588, 0.599, 0.397, 0.63, 0.616, 0.644, 0.579, 0.597, 0.617, 0.622,
0.548, 0.528, 0.541, 0.436, 0.509, 0.467, 0.534, 0.412, 0.324, 0.299, 0.41,
0.462, 0.427, 0.456, 0.508, 0.581, 0.242, 0.291, 0.324, 0.28, 0.291, 0.305,
0.365, 0.338, 0.399, 0.516, 0.357, 0.558, 0.605, 0.638, 0.191, 0.377, 0.325,
0.574, 0.458, 0.426, 0.188, 0.412, 0.464, 0.568, 0.582, 0.494, 0.598, 0.451,
0.577, 0.572, 0.602, 0.321, 0.38, 0.413, 0.427, 0.55, 0.437, 0.481, 0.425,
0.234, 0.466, 0.464, 0.491, 0.463, 0.489, 0.435, 0.267, 0.564, 0.256, 0.156,
0.476, 0.498, 0.122, 0.508, 0.582, 0.615, 0.409, 0.356, 0.284, 0.285, 0.444,
0.303, 0.478, 0.557, 0.345, 0.408, 0.347, 0.498, 0.534, 0.576, 0.361, 0.495,
0.502, 0.553, 0.519, 0.504, 0.53, 0.547, 0.559, 0.505, 0.557, 0.377, 0.36,
0.613, 0.452, 0.397, 0.277, 0.42, 0.443, 0.62), delta_z = c(211L, 171L, 925L,
534L, 498L, 50L, 53L, 331L, 135L, 456L, 850L, 288L, 286L, 233L, 342L, 27!
> 4L, 184L, 198L, 312L, 67L, 476L, 676L, 349L, 873L, 65L, 963L, 553L, 474L,
948L, 1082L, 616L, 704L, 814L, 450L, 865L, 987L, 1265L, 720L, 565L, 652L, 941L,
822L, 1239L, 929L, 477L, 361L, 199L, 203L, 642L, 788L, 818L, 450L, 703L, 760L,
711L, 1015L, 1351L, 195L, 511L, 617L, 296L, 604L, 381L, 389L, 287L, 1043L,
1465L, 963L, 1125L, 582L, 662L, 1424L, 1762L, 575L, 1477L, 1364L, 1236L, 1483L,
1201L, 1644L, 498L, 142L, 510L, 482L, 811L, 788L, 466L, 626L, 461L, 350L, 1177L,
826L, 575L, 568L, 916L, 767L, 1017L, 532L, 1047L, 1370L, 902L, 686L, 703L, 440L,
1016L, 1148L, 1089L, 753L, 650L, 1065L, 568L, 712L, 762L, 636L, 79L, 1092L,
955L, 158L, 1524L, 1145L, 673L, 513L, 596L, 239L)), .Names =
c("NDVIanno", "delta_z"), class = "data.frame",
row.names = c(NA, -124L))
>> chisq.test(SS)
> Pearson's Chi-squared test
> data: SS
> X-squared = 72.8115, df = 123, p-value = 0.9999
> Warning message:
> In chisq.test(SS) : Chi-squared approximation may be incorrect
>
#####################################################################################
> Kindly guide me through like you always did :)
> thanks in advance,
>
>
> Eliza
> [[alternative HTML version deleted]]
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
You are using a Chi-squared test on a 124x2 matrix of values (not all
integers) and many are zeros. The expected frequencies for many cells
are very small (near zero, less than 1) hence the warning message. More
importantly, does this application of the Chi-squared test make sense?
What am I missing?
Rick
--
Richard A. Bilonick, PhD
Assistant Professor
Dept. of Ophthalmology, School of Medicine
Dept. of Biostatistics, Graduate School of Public Health
Dept. of Orthodontics, School of Dental Medicine
University of Pittsburgh
Principal Investigator for the Pittsburgh Aerosol Research
and Inhalation Epidemiology Study (PARIES)
412 647 5756