Dears members, Is the below a bug of the cut {base} function? dat <- c( 0.6, 0.6, 0.6, 0.7, 0.7, 0.7, 0.7, 0.7, #(8) 0.8, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9, #(7) 1.0, 1.0, 1.0, 1.0, 1.1, 1.1, 1.1, #(7) 1.2, 1.2, 1.2, 1.2, 1.3, 1.3, 1.3, #(7) 1.4, 1.4, 1.4, 1.5, 1.5, 1.5, #(6) 1.6, 1.6, 1.7, 1.7, 1.7, 1.7, #(6) 1.8, 1.8, 1.8, 1.9, 1.9, #(5) 2.0, 2.0, 2.0, 2.0, 2.0, 2.1 #(6) ) # making class from function "cut" (f <- cut(dat, breaks= seq(from=.6, to=2.2, by=.2), include.lowest=TRUE, dig.lab=10L, right=FALSE)) # more easy to see the table as.matrix(tb <- table(f)) # Checking print(length(dat[dat >= 0.6 & dat < 0.8])) == tb[1] print(length(dat[dat >= 0.8 & dat < 1.0])) == tb[2] print(length(dat[dat >= 1.0 & dat < 1.2])) == tb[3] # !? print(length(dat[dat >= 1.2 & dat < 1.4])) == tb[4] # !? print(length(dat[dat >= 1.4 & dat < 1.6])) == tb[5] print(length(dat[dat >= 1.6 & dat < 1.8])) == tb[6] # !? print(length(dat[dat >= 1.8 & dat < 2.0])) == tb[7] # !? print(length(dat[dat >= 2.0 & dat < 2.2])) == tb[8] Best, ///\\\///\\\///\\\///\\\///\\\///\\\///\\\///\\\ Jose Claudio Faria UESC/DCET/Brasil joseclaudio.faria at gmail.com Telefones: 55(73)3680.5545 - UESC 55(73)99966.9100 - VIVO 55(73)98817.6159 - OI ///\\\///\\\///\\\///\\\///\\\///\\\///\\\///\\\ If you have software to deal with statistics, you have arms; if you have good software, you have arms and legs; if you have software like R, you have arms, legs and wings... the height of your flight depends only on you! [[alternative HTML version deleted]]
You've been bitten by FAQ 7.31: Why doesn't R think these numbers are equal? https://cran.r-project.org/doc/FAQ/R-FAQ.html#Why-doesn_0027t-R-think-these-numbers-are-equal_003f Your boundaries and your data values are not what you think they are. This is a limitation of digital computing not R.> print(seq(from=.6, to=2.2, by=.2), digits=17)[1] 0.59999999999999998 0.80000000000000004 1.00000000000000000 1.20000000000000018 [5] 1.39999999999999991 1.60000000000000009 1.80000000000000027 2.00000000000000000 [9] 2.20000000000000018> print(dat, digits=17)[1] 0.59999999999999998 0.59999999999999998 0.59999999999999998 0.69999999999999996 [5] 0.69999999999999996 0.69999999999999996 0.69999999999999996 0.69999999999999996 [9] 0.80000000000000004 0.80000000000000004 0.80000000000000004 0.90000000000000002 [13] 0.90000000000000002 0.90000000000000002 0.90000000000000002 1.00000000000000000 [17] 1.00000000000000000 1.00000000000000000 1.00000000000000000 1.10000000000000009 [21] 1.10000000000000009 1.10000000000000009 1.19999999999999996 1.19999999999999996 [25] 1.19999999999999996 1.19999999999999996 1.30000000000000004 1.30000000000000004 [29] 1.30000000000000004 1.39999999999999991 1.39999999999999991 1.39999999999999991 [33] 1.50000000000000000 1.50000000000000000 1.50000000000000000 1.60000000000000009 [37] 1.60000000000000009 1.69999999999999996 1.69999999999999996 1.69999999999999996 [41] 1.69999999999999996 1.80000000000000004 1.80000000000000004 1.80000000000000004 [45] 1.89999999999999991 1.89999999999999991 2.00000000000000000 2.00000000000000000 [49] 2.00000000000000000 2.00000000000000000 2.00000000000000000 2.10000000000000009 The simplest solution is to subtract a bit. This also means you don't need the include.lowest= or right= arguments:> f <- cut(dat,+ breaks= seq(from=.6-.01, to=2.2-.01, by=.2), + dig.lab=10L)> as.matrix(tb <- table(f))[,1] [0.59,0.79) 8 [0.79,0.99) 7 [0.99,1.19) 7 [1.19,1.39) 7 [1.39,1.59) 6 [1.59,1.79) 6 [1.79,1.99) 5 [1.99,2.19] 6 ---------------------------------------- David L Carlson Department of Anthropology Texas A&M University College Station, TX 77843-4352 -----Original Message----- From: R-help <r-help-bounces at r-project.org> On Behalf Of Jose Claudio Faria Sent: Monday, September 24, 2018 9:32 AM To: r-help at r-project.org Subject: [R] cut{base}: is it a bug? Dears members, Is the below a bug of the cut {base} function? dat <- c( 0.6, 0.6, 0.6, 0.7, 0.7, 0.7, 0.7, 0.7, #(8) 0.8, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9, #(7) 1.0, 1.0, 1.0, 1.0, 1.1, 1.1, 1.1, #(7) 1.2, 1.2, 1.2, 1.2, 1.3, 1.3, 1.3, #(7) 1.4, 1.4, 1.4, 1.5, 1.5, 1.5, #(6) 1.6, 1.6, 1.7, 1.7, 1.7, 1.7, #(6) 1.8, 1.8, 1.8, 1.9, 1.9, #(5) 2.0, 2.0, 2.0, 2.0, 2.0, 2.1 #(6) ) # making class from function "cut" (f <- cut(dat, breaks= seq(from=.6, to=2.2, by=.2), include.lowest=TRUE, dig.lab=10L, right=FALSE)) # more easy to see the table as.matrix(tb <- table(f)) # Checking print(length(dat[dat >= 0.6 & dat < 0.8])) == tb[1] print(length(dat[dat >= 0.8 & dat < 1.0])) == tb[2] print(length(dat[dat >= 1.0 & dat < 1.2])) == tb[3] # !? print(length(dat[dat >= 1.2 & dat < 1.4])) == tb[4] # !? print(length(dat[dat >= 1.4 & dat < 1.6])) == tb[5] print(length(dat[dat >= 1.6 & dat < 1.8])) == tb[6] # !? print(length(dat[dat >= 1.8 & dat < 2.0])) == tb[7] # !? print(length(dat[dat >= 2.0 & dat < 2.2])) == tb[8] Best, ///\\\///\\\///\\\///\\\///\\\///\\\///\\\///\\\ Jose Claudio Faria UESC/DCET/Brasil joseclaudio.faria at gmail.com Telefones: 55(73)3680.5545 - UESC 55(73)99966.9100 - VIVO 55(73)98817.6159 - OI ///\\\///\\\///\\\///\\\///\\\///\\\///\\\///\\\ If you have software to deal with statistics, you have arms; if you have good software, you have arms and legs; if you have software like R, you have arms, legs and wings... the height of your flight depends only on you! [[alternative HTML version deleted]] ______________________________________________ R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.
"Subtracting a bit" only fixes the problem for the test data... it introduces a bias in any continuous data you happen to throw at it. However, if you have data with known rounding applied (e.g. published tabular data) then the subtracting trick can be useful. In general you should not expect floating point fractions to behave like exact values in your analysis. On September 24, 2018 8:14:09 AM PDT, David L Carlson <dcarlson at tamu.edu> wrote:>You've been bitten by FAQ 7.31: Why doesn't R think these numbers are >equal? >https://cran.r-project.org/doc/FAQ/R-FAQ.html#Why-doesn_0027t-R-think-these-numbers-are-equal_003f > >Your boundaries and your data values are not what you think they are. >This is a limitation of digital computing not R. > >> print(seq(from=.6, to=2.2, by=.2), digits=17) >[1] 0.59999999999999998 0.80000000000000004 1.00000000000000000 >1.20000000000000018 >[5] 1.39999999999999991 1.60000000000000009 1.80000000000000027 >2.00000000000000000 >[9] 2.20000000000000018 > >> print(dat, digits=17) >[1] 0.59999999999999998 0.59999999999999998 0.59999999999999998 >0.69999999999999996 >[5] 0.69999999999999996 0.69999999999999996 0.69999999999999996 >0.69999999999999996 >[9] 0.80000000000000004 0.80000000000000004 0.80000000000000004 >0.90000000000000002 >[13] 0.90000000000000002 0.90000000000000002 0.90000000000000002 >1.00000000000000000 >[17] 1.00000000000000000 1.00000000000000000 1.00000000000000000 >1.10000000000000009 >[21] 1.10000000000000009 1.10000000000000009 1.19999999999999996 >1.19999999999999996 >[25] 1.19999999999999996 1.19999999999999996 1.30000000000000004 >1.30000000000000004 >[29] 1.30000000000000004 1.39999999999999991 1.39999999999999991 >1.39999999999999991 >[33] 1.50000000000000000 1.50000000000000000 1.50000000000000000 >1.60000000000000009 >[37] 1.60000000000000009 1.69999999999999996 1.69999999999999996 >1.69999999999999996 >[41] 1.69999999999999996 1.80000000000000004 1.80000000000000004 >1.80000000000000004 >[45] 1.89999999999999991 1.89999999999999991 2.00000000000000000 >2.00000000000000000 >[49] 2.00000000000000000 2.00000000000000000 2.00000000000000000 >2.10000000000000009 > >The simplest solution is to subtract a bit. This also means you don't >need the include.lowest= or right= arguments: > >> f <- cut(dat, >+ breaks= seq(from=.6-.01, to=2.2-.01, by=.2), >+ dig.lab=10L) >> as.matrix(tb <- table(f)) > [,1] >[0.59,0.79) 8 >[0.79,0.99) 7 >[0.99,1.19) 7 >[1.19,1.39) 7 >[1.39,1.59) 6 >[1.59,1.79) 6 >[1.79,1.99) 5 >[1.99,2.19] 6 > >---------------------------------------- >David L Carlson >Department of Anthropology >Texas A&M University >College Station, TX 77843-4352 > > >-----Original Message----- >From: R-help <r-help-bounces at r-project.org> On Behalf Of Jose Claudio >Faria >Sent: Monday, September 24, 2018 9:32 AM >To: r-help at r-project.org >Subject: [R] cut{base}: is it a bug? > >Dears members, > >Is the below a bug of the cut {base} function? > >dat <- c( > 0.6, 0.6, 0.6, 0.7, 0.7, 0.7, 0.7, 0.7, #(8) > 0.8, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9, #(7) > 1.0, 1.0, 1.0, 1.0, 1.1, 1.1, 1.1, #(7) > 1.2, 1.2, 1.2, 1.2, 1.3, 1.3, 1.3, #(7) > 1.4, 1.4, 1.4, 1.5, 1.5, 1.5, #(6) > 1.6, 1.6, 1.7, 1.7, 1.7, 1.7, #(6) > 1.8, 1.8, 1.8, 1.9, 1.9, #(5) > 2.0, 2.0, 2.0, 2.0, 2.0, 2.1 #(6) > ) > ># making class from function "cut" >(f <- cut(dat, > breaks= seq(from=.6, to=2.2, by=.2), > include.lowest=TRUE, > dig.lab=10L, > right=FALSE)) > ># more easy to see the table >as.matrix(tb <- table(f)) > ># Checking >print(length(dat[dat >= 0.6 & dat < 0.8])) == tb[1] >print(length(dat[dat >= 0.8 & dat < 1.0])) == tb[2] >print(length(dat[dat >= 1.0 & dat < 1.2])) == tb[3] # !? >print(length(dat[dat >= 1.2 & dat < 1.4])) == tb[4] # !? >print(length(dat[dat >= 1.4 & dat < 1.6])) == tb[5] >print(length(dat[dat >= 1.6 & dat < 1.8])) == tb[6] # !? >print(length(dat[dat >= 1.8 & dat < 2.0])) == tb[7] # !? >print(length(dat[dat >= 2.0 & dat < 2.2])) == tb[8] > >Best, >///\\\///\\\///\\\///\\\///\\\///\\\///\\\///\\\ >Jose Claudio Faria >UESC/DCET/Brasil >joseclaudio.faria at gmail.com >Telefones: >55(73)3680.5545 - UESC >55(73)99966.9100 - VIVO >55(73)98817.6159 - OI >///\\\///\\\///\\\///\\\///\\\///\\\///\\\///\\\ > >If you have software to deal with statistics, you have arms; if you >have good software, you have arms and legs; if you have software like >R, you have arms, legs and wings... >the height of your flight depends only on you! > > [[alternative HTML version deleted]] > >______________________________________________ >R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see >https://stat.ethz.ch/mailman/listinfo/r-help >PLEASE do read the posting guide >http://www.R-project.org/posting-guide.html >and provide commented, minimal, self-contained, reproducible code. > >______________________________________________ >R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see >https://stat.ethz.ch/mailman/listinfo/r-help >PLEASE do read the posting guide >http://www.R-project.org/posting-guide.html >and provide commented, minimal, self-contained, reproducible code.-- Sent from my phone. Please excuse my brevity.