Dear Melissa, It seems strange to me that your code would work on any platform (it doesn't on my Mac) because the data frame you create shouldn't contain a matrix named "X" but rather columns including those originating from X. To illustrate: > X <- matrix(1:12, 4, 3) > colnames(X) <- c("a", "b", "c") > X a b c [1,] 1 5 9 [2,] 2 6 10 [3,] 3 7 11 [4,] 4 8 12 > y <- 1:4 > (D <- data.frame(y, X)) y a b c 1 1 1 5 9 2 2 2 6 10 3 3 3 7 11 4 4 4 8 12 > str(D) 'data.frame': 4 obs. of 4 variables: $ y: int 1 2 3 4 $ a: int 1 2 3 4 $ b: int 5 6 7 8 $ c: int 9 10 11 12 My session info: > sessionInfo() R version 4.1.2 (2021-11-01) Platform: aarch64-apple-darwin20 (64-bit) Running under: macOS Monterey 12.1 Matrix products: default LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] nlme_3.1-153 HRW_1.0-5 loaded via a namespace (and not attached): [1] compiler_4.1.2 tools_4.1.2 KernSmooth_2.23-20 splines_4.1.2 [5] grid_4.1.2 lattice_0.20-45 I hope this helps, John -- John Fox, Professor Emeritus McMaster University Hamilton, Ontario, Canada web: https://socialsciences.mcmaster.ca/jfox/ On 2022-01-07 11:23 a.m., Key, Melissa wrote:> I am trying to replicate a semi-parametric analysis described in Harezlak, Jaroslaw, David Ruppert, and Matt P. Wand. Semiparametric regression with R. New York, NY: Springer, 2018. (https://link.springer.com/book/10.1007%2F978-1-4939-8853-2). > > I can successfully run the analysis, but now I'm trying to move it into my workflow, which requires that the analysis be conducted within a function (using targets package), and the `groupedData` function now fails with an error that it cannot find the `X` matrix (see reprex below). I've tried the reprex on both my personal Mac (where it works??) and on windows machines (where it does not) - so the problem is likely specific to Windows computers (yes, this seems weird to me too). > All packages have been updated, and I'm running the latest version of R on all machines. > > Reprex: > > library(HRW) # contains example data and ZOSull function > library(nlme) > > data(growthIndiana) > > > analyze_this <- function(df) { > > mean.x <- mean(df$age) > mean.y <- mean(df$height) > sd.x <- sd(df$age) > sd.y <- sd(df$height) > > df$x <- (df$age - mean.x) / sd.x > df$y <- (df$height - mean.y) / sd.y > > X <- model.matrix(~ x * male * black, data = df) > dummyID <- rep(1, length(nrow(X))) > > grouped_data <- groupedData(y ~ X[,-1]|rep(1, length = nrow(X)), data = data.frame(y = df$y, X, dummyID)) > > } > > > # doesn't work on Windows machine, does work on the Mac > analyze_this(growthIndiana) > #> Error in eval(aux[[2]], object): object 'X' not found > > # does work > > df <- growthIndiana > > mean.x <- mean(df$age) > mean.y <- mean(df$height) > sd.x <- sd(df$age) > sd.y <- sd(df$height) > > df$x <- (df$age - mean.x) / sd.x > df$y <- (df$height - mean.y) / sd.y > > X <- model.matrix(~ x * male * black, data = df) > dummyID <- rep(1, length(nrow(X))) > > grouped_data <- groupedData(y ~ X[,-1]|rep(1, length = nrow(X)), data = data.frame(y = df$y, X, dummyID)) > > > # attempted work-around. > > analyze_this2 <- function(df) { > num.global.knots = 20 > num.subject.knots = 10 > > mean.x <- mean(df$age) > mean.y <- mean(df$height) > sd.x <- sd(df$age) > sd.y <- sd(df$height) > > df$x <- (df$age - mean.x) / sd.x > df$y <- (df$height - mean.y) / sd.y > > X <- model.matrix(~ x * male * black, data = df) > dummyID <- rep(1, length(nrow(X))) > > # grouped_data <- groupedData(y ~ X[,-1]|rep(1, length = nrow(X)), data = data.frame(y = df$y, X, dummyID)) > > global.knots = quantile(unique(df$x), seq(0, 1, length = num.global.knots + 2)[-c(1, num.global.knots + 2)]) > subject.knots = quantile(unique(df$x), seq(0, 1, length = num.subject.knots + 2)[-c(1, num.subject.knots + 2)]) > > Z.global <- ZOSull(df$x, range.x = range(df$x), global.knots) > Z.group <- df$black * Z.global > Z.subject <- ZOSull(df$x, range.x = range(df$x), subject.knots) > > Zblock <- list( > dummyID = pdIdent(~ 0 + Z.global), > dummyID = pdIdent(~ 0 + Z.group), > idnum = pdSymm(~ x), > idnum = pdIdent(~ 0 + Z.subject) > ) > > df$dummyID <- dummyID > tmp_data <- c( > df, > X = list(X), > Z.global = list(Z.global), > Z.group = list(Z.global), > Z.subject = list(Z.subject) > ) > > fit <- lme(y ~ 0 + X, > data = tmp_data, > random = Zblock > ) > > } > > # this works (warning - lme takes awhile to fit) > analyze_this2(growthIndiana) > > sessionInfo() > #> R version 4.1.2 (2021-11-01) > #> Platform: x86_64-w64-mingw32/x64 (64-bit) > #> Running under: Windows 10 x64 (build 22000) > #> > #> Matrix products: default > #> > #> locale: > #> [1] LC_COLLATE=English_United States.1252 > #> [2] LC_CTYPE=English_United States.1252 > #> [3] LC_MONETARY=English_United States.1252 > #> [4] LC_NUMERIC=C > #> [5] LC_TIME=English_United States.1252 > #> > #> attached base packages: > #> [1] stats graphics grDevices utils datasets methods base > #> > #> other attached packages: > #> [1] nlme_3.1-153 HRW_1.0-5 > #> > #> loaded via a namespace (and not attached): > #> [1] lattice_0.20-45 digest_0.6.29 withr_2.4.3 grid_4.1.2 > #> [5] magrittr_2.0.1 reprex_2.0.1 evaluate_0.14 KernSmooth_2.23-20 > #> [9] highr_0.9 stringi_1.7.6 rlang_0.4.12 cli_3.1.0 > #> [13] rstudioapi_0.13 fs_1.5.2 rmarkdown_2.11 splines_4.1.2 > #> [17] tools_4.1.2 stringr_1.4.0 glue_1.6.0 xfun_0.29 > #> [21] yaml_2.2.1 fastmap_1.1.0 compiler_4.1.2 htmltools_0.5.2 > #> [25] knitr_1.37 > > Created on 2022-01-07 by the [reprex package](https://reprex.tidyverse.org) (v2.0.1) > > Here's the session Info for the Mac: > > sessionInfo() > R version 4.1.2 (2021-11-01) > Platform: aarch64-apple-darwin20 (64-bit) > Running under: macOS Monterey 12.1 > > Matrix products: default > LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib > > locale:[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 > attached base packages: > [1] stats graphics grDevices utils datasets methods base > > other attached packages: > [1] nlme_3.1-153 targets_0.8.1 HRW_1.0-5 > > loaded via a namespace (and not attached): > [1] compiler_4.1.2 pillar_1.6.4 tools_4.1.2 digest_0.6.28 lattice_0.20-45 jsonlite_1.7.2 evaluate_0.14 > [8] lifecycle_1.0.1 tibble_3.1.6 pkgconfig_2.0.3 rlang_0.4.12 igraph_1.2.9 cli_3.1.0 yaml_2.2.1 > [15] xfun_0.28 fastmap_1.1.0 withr_2.4.2 knitr_1.36 htmlwidgets_1.5.4 vctrs_0.3.8 grid_4.1.2 > [22] tidyselect_1.1.1 glue_1.5.0 data.table_1.14.2 R6_2.5.1 processx_3.5.2 fansi_0.5.0 rmarkdown_2.11 > [29] callr_3.7.0 purrr_0.3.4 magrittr_2.0.1 scales_1.1.1 codetools_0.2-18 ps_1.6.0 htmltools_0.5.2 > [36] ellipsis_0.3.2 splines_4.1.2 colorspace_2.0-2 KernSmooth_2.23-20 utf8_1.2.2 visNetwork_2.1.0 munsell_0.5.0 > [43] crayon_1.4.2 > > > Melissa Key, Ph.D. > Statistician > Non-Invasive Brain Stimulation Team > Infoscitex > > Cell: 937-550-4981 > Email: mkey at infoscitex.com<mailto:mkey at infoscitex.com> > Base email: melissa.key.ctr at us.af.mil<mailto:melissa.key.ctr at us.af.mil> > > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.
Key, Melissa
2022-Jan-07 22:29 UTC
[R] [EXTERNAL] Re: bug in Windows implementation of nlme::groupedData
John, Thanks for your response. I agree that the definition of the data frame is poor (in my defense it came directly from the demo code, but I should have checked it more thoroughly). The good news is that your comments caused me to take a closer look at where X was defined, and I found the reason I wasn't getting the same results on my Mac and PC - that error was between keyboard and chair. There is still something funny going on though (at least relative to my previous experience with how R searches environments: If X is defined in the global environment, groupedData can find it there and use it. (this is what I'm used to) If X is defined within a function, groupedData cannot find it, even if groupedData is called within the same function. (this seems strange to me - usually parent.frame() captures information within the function environment, or so I thought) My solution at the bottom still works - and unlike groupedData, nlme allows a list as input to the data argument (or at least, doesn't check to make sure it's a data frame), so I have a working (albeit hacky) solution that actually makes more sense to me than using groupedData, but it still seems strange that the function cannot find X in its search path. Thanks again! Melissa -----Original Message----- From: John Fox <jfox at mcmaster.ca> Sent: Friday, January 7, 2022 4:35 PM To: Key, Melissa <mkey at infoscitex.com> Cc: r-help at r-project.org Subject: [EXTERNAL] Re: [R] bug in Windows implementation of nlme::groupedData Dear Melissa, It seems strange to me that your code would work on any platform (it doesn't on my Mac) because the data frame you create shouldn't contain a matrix named "X" but rather columns including those originating from X. To illustrate: > X <- matrix(1:12, 4, 3) > colnames(X) <- c("a", "b", "c") > X a b c [1,] 1 5 9 [2,] 2 6 10 [3,] 3 7 11 [4,] 4 8 12 > y <- 1:4 > (D <- data.frame(y, X)) y a b c 1 1 1 5 9 2 2 2 6 10 3 3 3 7 11 4 4 4 8 12 > str(D) 'data.frame': 4 obs. of 4 variables: $ y: int 1 2 3 4 $ a: int 1 2 3 4 $ b: int 5 6 7 8 $ c: int 9 10 11 12 My session info: > sessionInfo() R version 4.1.2 (2021-11-01) Platform: aarch64-apple-darwin20 (64-bit) Running under: macOS Monterey 12.1 Matrix products: default LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] nlme_3.1-153 HRW_1.0-5 loaded via a namespace (and not attached): [1] compiler_4.1.2 tools_4.1.2 KernSmooth_2.23-20 splines_4.1.2 [5] grid_4.1.2 lattice_0.20-45 I hope this helps, John -- John Fox, Professor Emeritus McMaster University Hamilton, Ontario, Canada web: https://usg02.safelinks.protection.office365.us/?url=https%3A%2F%2Fsocialsciences.mcmaster.ca%2Fjfox%2F&data=04%7C01%7Cmkey%40infoscitex.com%7C57e0067b32a24526600c08d9d2258bd8%7C3430c5de5e6f4f9d9d9690e985038b58%7C0%7C0%7C637771881005380638%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=nTXsf6vbxVIstMup5AUlmABv9CdQa3hw44Fdb2lej7A%3D&reserved=0 On 2022-01-07 11:23 a.m., Key, Melissa wrote:> I am trying to replicate a semi-parametric analysis described in Harezlak, Jaroslaw, David Ruppert, and Matt P. Wand. Semiparametric regression with R. New York, NY: Springer, 2018. (https://usg02.safelinks.protection.office365.us/?url=https%3A%2F%2Flink.springer.com%2Fbook%2F10.1007%252F978-1-4939-8853-2&data=04%7C01%7Cmkey%40infoscitex.com%7C57e0067b32a24526600c08d9d2258bd8%7C3430c5de5e6f4f9d9d9690e985038b58%7C0%7C0%7C637771881005380638%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=ESCv%2Bp%2FPo7pyKm055Y4nYAK8Cj1RTkorP9ZvahZTPmE%3D&reserved=0). > > I can successfully run the analysis, but now I'm trying to move it into my workflow, which requires that the analysis be conducted within a function (using targets package), and the `groupedData` function now fails with an error that it cannot find the `X` matrix (see reprex below). I've tried the reprex on both my personal Mac (where it works??) and on windows machines (where it does not) - so the problem is likely specific to Windows computers (yes, this seems weird to me too). > All packages have been updated, and I'm running the latest version of R on all machines. > > Reprex: > > library(HRW) # contains example data and ZOSull function > library(nlme) > > data(growthIndiana) > > > analyze_this <- function(df) { > > mean.x <- mean(df$age) > mean.y <- mean(df$height) > sd.x <- sd(df$age) > sd.y <- sd(df$height) > > df$x <- (df$age - mean.x) / sd.x > df$y <- (df$height - mean.y) / sd.y > > X <- model.matrix(~ x * male * black, data = df) > dummyID <- rep(1, length(nrow(X))) > > grouped_data <- groupedData(y ~ X[,-1]|rep(1, length = nrow(X)), > data = data.frame(y = df$y, X, dummyID)) > > } > > > # doesn't work on Windows machine, does work on the Mac > analyze_this(growthIndiana) > #> Error in eval(aux[[2]], object): object 'X' not found > > # does work > > df <- growthIndiana > > mean.x <- mean(df$age) > mean.y <- mean(df$height) > sd.x <- sd(df$age) > sd.y <- sd(df$height) > > df$x <- (df$age - mean.x) / sd.x > df$y <- (df$height - mean.y) / sd.y > > X <- model.matrix(~ x * male * black, data = df) dummyID <- rep(1, > length(nrow(X))) > > grouped_data <- groupedData(y ~ X[,-1]|rep(1, length = nrow(X)), data > = data.frame(y = df$y, X, dummyID)) > > > # attempted work-around. > > analyze_this2 <- function(df) { > num.global.knots = 20 > num.subject.knots = 10 > > mean.x <- mean(df$age) > mean.y <- mean(df$height) > sd.x <- sd(df$age) > sd.y <- sd(df$height) > > df$x <- (df$age - mean.x) / sd.x > df$y <- (df$height - mean.y) / sd.y > > X <- model.matrix(~ x * male * black, data = df) > dummyID <- rep(1, length(nrow(X))) > > # grouped_data <- groupedData(y ~ X[,-1]|rep(1, length = nrow(X)), > data = data.frame(y = df$y, X, dummyID)) > > global.knots = quantile(unique(df$x), seq(0, 1, length = num.global.knots + 2)[-c(1, num.global.knots + 2)]) > subject.knots = quantile(unique(df$x), seq(0, 1, length = > num.subject.knots + 2)[-c(1, num.subject.knots + 2)]) > > Z.global <- ZOSull(df$x, range.x = range(df$x), global.knots) > Z.group <- df$black * Z.global > Z.subject <- ZOSull(df$x, range.x = range(df$x), subject.knots) > > Zblock <- list( > dummyID = pdIdent(~ 0 + Z.global), > dummyID = pdIdent(~ 0 + Z.group), > idnum = pdSymm(~ x), > idnum = pdIdent(~ 0 + Z.subject) > ) > > df$dummyID <- dummyID > tmp_data <- c( > df, > X = list(X), > Z.global = list(Z.global), > Z.group = list(Z.global), > Z.subject = list(Z.subject) > ) > > fit <- lme(y ~ 0 + X, > data = tmp_data, > random = Zblock > ) > > } > > # this works (warning - lme takes awhile to fit) > analyze_this2(growthIndiana) > > sessionInfo() > #> R version 4.1.2 (2021-11-01) > #> Platform: x86_64-w64-mingw32/x64 (64-bit) #> Running under: Windows > 10 x64 (build 22000) #> #> Matrix products: default #> #> locale: > #> [1] LC_COLLATE=English_United States.1252 #> [2] > LC_CTYPE=English_United States.1252 #> [3] LC_MONETARY=English_United > States.1252 #> [4] LC_NUMERIC=C #> [5] LC_TIME=English_United > States.1252 #> #> attached base packages: > #> [1] stats graphics grDevices utils datasets methods base > #> > #> other attached packages: > #> [1] nlme_3.1-153 HRW_1.0-5 > #> > #> loaded via a namespace (and not attached): > #> [1] lattice_0.20-45 digest_0.6.29 withr_2.4.3 grid_4.1.2 > #> [5] magrittr_2.0.1 reprex_2.0.1 evaluate_0.14 KernSmooth_2.23-20 > #> [9] highr_0.9 stringi_1.7.6 rlang_0.4.12 cli_3.1.0 > #> [13] rstudioapi_0.13 fs_1.5.2 rmarkdown_2.11 splines_4.1.2 > #> [17] tools_4.1.2 stringr_1.4.0 glue_1.6.0 xfun_0.29 > #> [21] yaml_2.2.1 fastmap_1.1.0 compiler_4.1.2 htmltools_0.5.2 > #> [25] knitr_1.37 > > Created on 2022-01-07 by the [reprex > package](https://usg02.safelinks.protection.office365.us/?url=https%3A > %2F%2Freprex.tidyverse.org%2F&data=04%7C01%7Cmkey%40infoscitex.com > %7C57e0067b32a24526600c08d9d2258bd8%7C3430c5de5e6f4f9d9d9690e985038b58 > %7C0%7C0%7C637771881005380638%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAw > MDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=hH > 5ncnSVZGQ4KZsvLpvjgM%2Fgf9Zu5QnoeSntTrCZWjk%3D&reserved=0) > (v2.0.1) > > Here's the session Info for the Mac: > > sessionInfo() > R version 4.1.2 (2021-11-01) > Platform: aarch64-apple-darwin20 (64-bit) Running under: macOS > Monterey 12.1 > > Matrix products: default > LAPACK: > /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRl > apack.dylib > > locale:[1] > en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 > attached base packages: > [1] stats graphics grDevices utils datasets methods base > > other attached packages: > [1] nlme_3.1-153 targets_0.8.1 HRW_1.0-5 > > loaded via a namespace (and not attached): > [1] compiler_4.1.2 pillar_1.6.4 tools_4.1.2 digest_0.6.28 lattice_0.20-45 jsonlite_1.7.2 evaluate_0.14 > [8] lifecycle_1.0.1 tibble_3.1.6 pkgconfig_2.0.3 rlang_0.4.12 igraph_1.2.9 cli_3.1.0 yaml_2.2.1 > [15] xfun_0.28 fastmap_1.1.0 withr_2.4.2 knitr_1.36 htmlwidgets_1.5.4 vctrs_0.3.8 grid_4.1.2 > [22] tidyselect_1.1.1 glue_1.5.0 data.table_1.14.2 R6_2.5.1 processx_3.5.2 fansi_0.5.0 rmarkdown_2.11 > [29] callr_3.7.0 purrr_0.3.4 magrittr_2.0.1 scales_1.1.1 codetools_0.2-18 ps_1.6.0 htmltools_0.5.2 > [36] ellipsis_0.3.2 splines_4.1.2 colorspace_2.0-2 KernSmooth_2.23-20 utf8_1.2.2 visNetwork_2.1.0 munsell_0.5.0 > [43] crayon_1.4.2 > > > Melissa Key, Ph.D. > Statistician > Non-Invasive Brain Stimulation Team > Infoscitex > > Cell: 937-550-4981 > Email: mkey at infoscitex.com<mailto:mkey at infoscitex.com> > Base email: > melissa.key.ctr at us.af.mil<mailto:melissa.key.ctr at us.af.mil> > > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://usg02.safelinks.protection.office365.us/?url=https%3A%2F%2Fsta > t.ethz.ch%2Fmailman%2Flistinfo%2Fr-help&data=04%7C01%7Cmkey%40info > scitex.com%7C57e0067b32a24526600c08d9d2258bd8%7C3430c5de5e6f4f9d9d9690 > e985038b58%7C0%7C0%7C637771881005380638%7CUnknown%7CTWFpbGZsb3d8eyJWIj > oiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&am > p;sdata=4c%2Bnu06ZfIKpwowJCTz2mIl7kAEQR5vbZSw8pTuqumk%3D&reserved> 0 PLEASE do read the posting guide > https://usg02.safelinks.protection.office365.us/?url=http%3A%2F%2Fwww. > r-project.org%2Fposting-guide.html&data=04%7C01%7Cmkey%40infoscite > x.com%7C57e0067b32a24526600c08d9d2258bd8%7C3430c5de5e6f4f9d9d9690e9850 > 38b58%7C0%7C0%7C637771881005380638%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4 > wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sda > ta=iT8fu9JX5i7gUfo0T2gQMVdHEm%2FSThakp2yaOJ2pOBQ%3D&reserved=0 > and provide commented, minimal, self-contained, reproducible code.