Hola, quiero hacer una regresión lineal lm(y ~ x * grupo, data =datos) y: numérica, x: numérica, grupo: factor con dos niveles (1 y 2) pero no calcula el coeficiente de x:grupo2 a cuenta de una singularidad Coefficients: (1 not defined because of singularities) Estimate Std. Error t value Pr(>|t|) (Intercept) -1.283e+06 2.276e+04 -56.359 < 2e-16 *** x 9.696e-04 1.720e-05 56.359 < 2e-16 *** grupo2 8.940e-02 2.339e-02 3.823 0.000316 *** x:grupo2 NA NA NA NA Sin embargo, si lo hago paso a paso obtengo el coeficiente sin ningún problema. est sd tstat prt -6.241814e-04 4.386667e-05 -1.422906e+01 9.875019e-21 ¿Alguna sugerencia? Gracias Emilio PD. Aquí va el código. rm(list=ls()) datos <- structure(list(x = c(1322988541, 1322988561, 1322988571, 1322988581, 1322988591, 1322988601, 1322988631, 1322988641, 1322988671, 1322988691, 1322988701, 1322988741, 1322988851, 1322988861, 1322988881, 1322988901, 1322988921, 1322988941, 1322988971, 1322989031, 1322989051, 1322989101, 1322989145, 1322989155, 1322989165, 1322989195, 1322989205, 1322989225, 1322989255, 1322989285, 1322989305, 1322989325, 1322989335, 1322989345, 1322989385, 1322989395, 1322989425, 1322989465, 1322989515, 1322989545, 1322989565, 1322989585, 1322989635, 1322989645, 1322989655, 1322989735, 1322989775, 1322989845, 1322989965, 1322990015, 1322990065, 1322990125, 1322990165, 1322990325, 1322990375, 1322990395, 1322990435, 1322990475, 1322990495, 1322990565, 1322990585, 1322990605, 1322990615), y = c(0, 0.0331545271786949, 0.0441494615282616, 0.0588281430638927, 0.105414946889965, 0.114584495389247, 0.136280527285279, 0.222000735729392, 0.270940599915131, 0.297288944178842, 0.318393064910918, 0.355511383046162, 0.523060399246142, 0.533111548416975, 0.564842299095897, 0.588676620944726, 0.611545644320544, 0.634421359300242, 0.666313521634898, 0.720167541778452, 0.73305715343862, 0.771462932299023, 0.786787376834878, 0.822455110136009, 0.842948522760745, 0.89054108302027, 0.896418224686584, 0.912860950728539, 0.948749247273654, 0.968819427542235, 0.994936619570842, 1.01262016683246, 1.02718993008829, 1.03825902504014, 1.07121071404195, 1.08254211763514, 1.09702138199726, 1.12365971853979, 1.18661502223515, 1.19851974419895, 1.21335779153356, 1.23353641854298, 1.27276015781082, 1.28550028269269, 1.30824810251295, 1.33422806471771, 1.36068254611751, 1.41264023191542, 1.55647787533637, 1.62241537222118, 1.68843510799368, 1.76907947225848, 1.81924949620336, 2.01387957987011, 2.06261906430172, 2.07163544563991, 2.09305303167851, 2.11268774697497, 2.12451660254162, 2.13053196371228, 2.1530958080546, 2.17003590901411, 2.18029803192084), grupo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor")), .Names = c("x", "y", "grupo"), row.names = c(NA, -63L), class = "data.frame") modelo <- lm(y~x*grupo, data =datos) summary(modelo) ## el coeficiente x:grupo2 no funciona ## Coefficients: (1 not defined because of singularities) ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -1.283e+06 2.276e+04 -56.359 < 2e-16 *** ## x 9.696e-04 1.720e-05 56.359 < 2e-16 *** ## grupo2 8.940e-02 2.339e-02 3.823 0.000316 *** ## x:grupo2 NA NA NA NA sacarelcoeficienteamano <- function(dat) { fits <- lapply(split(dat,dat$grupo),lm,formula=y~x) sums <- lapply(fits,summary) coefs <- lapply(sums,coef) db <- coefs[[2]]["x","Estimate"]-coefs[[1]]["x","Estimate"] sd <- sqrt(sum(sapply(coefs,function(x) x["x","Std. Error"])^2)) df <- sum(sapply(fits,"[[","df.residual")) td <- db/sd c(est=db,sd=sd,tstat=td,prt=2*pt(-abs(td),df)) } sacarelcoeficienteamano(datos) ## est sd tstat prt ## -6.241814e-04 4.386667e-05 -1.422906e+01 9.875019e-21 library(ggplot2) ggplot() + geom_point(aes(x=x, y=y, color=grupo), data =datos) + geom_smooth(aes(x=x, y=y, group=grupo), method = "lm", data = datos) sessionInfo() ## R version 2.14.2 (2012-02-29) ## Platform: i486-pc-linux-gnu (32-bit) ## locale: ## [1] LC_CTYPE=es_ES.UTF-8 LC_NUMERIC=C ## [3] LC_TIME=es_ES.UTF-8 LC_COLLATE=es_ES.UTF-8 ## [5] LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=es_ES.UTF-8 ## [7] LC_PAPER=C LC_NAME=C ## [9] LC_ADDRESS=C LC_TELEPHONE=C ## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## other attached packages: ## [1] ggplot2_0.9.0 ## loaded via a namespace (and not attached): ## [1] colorspace_1.1-1 dichromat_1.2-4 digest_0.5.1 grid_2.14.2 ## [5] MASS_7.3-16 memoise_0.1 munsell_0.3 plyr_1.7.1 ## [9] proto_0.3-9.2 RColorBrewer_1.0-5 reshape2_1.2.1 scales_0.2.0 ## [13] stringr_0.6 -- ------------------------------------------------- Emilio Torres Manzanera Fac. de Comercio - Universidad de Oviedo c/ Luis Moya 261, E-33203 Gijón (Spain) Tel. 985 182 197 email: torres en uniovi.es
En los modelos lineales, cuando introduces una variable de clasificación con K-niveles, estas se mapea internamente en (k-1) varaibles indicadoras (0-1). Por lo tanto si tienes dos grupos solo aparecerá uno de ellos en la lista de parámetros. Para el modelo que quieres ajustar, puedes forzar a que aparezca la estimación del segundo grupo eliminando la ordenada al origen. Esta es substituida por dos ordenadas al origen, una para cada grupo. La instrucción es simple y consiste en agregar un *-1.* lm(y ~ x * grupo*-1*, data =datos) En la practica habitual tienes que acostumbrarte a leer la salida sin forzar a la eliminación de la constante porque esta forma de reparametrizar el modelo es sólo viable para modelos sencillos. Si comparas el modelo estimado agregando el -1 con el modelo sin el, veras que la ordenada al origen del modelo sin el -1 es la media del grupo correspondiente al coeficiente omitido Prof. Julio Di Rienzo Estadística y Biometría FCA- U.N. Córdoba IBS-RARG President sites.google.com/site/juliodirienzo "Biometry, the active pursuit of biological knowledge by quantitative methods." (R.A. Fisher, 1948) 2012/3/31 Emilio Torres Manzanera <torres@uniovi.es>> > > > Hola, > quiero hacer una regresión lineal > lm(y ~ x * grupo, data =datos) > y: numérica, x: numérica, grupo: factor con dos niveles (1 y 2) > pero no calcula el coeficiente de x:grupo2 a cuenta de una singularidad > > Coefficients: (1 not defined because of singularities) > Estimate Std. Error t value Pr(>|t|) > (Intercept) -1.283e+06 2.276e+04 -56.359 < 2e-16 *** > x 9.696e-04 1.720e-05 56.359 < 2e-16 *** > grupo2 8.940e-02 2.339e-02 3.823 0.000316 *** > x:grupo2 NA NA NA NA > > Sin embargo, si lo hago paso a paso obtengo el coeficiente sin ningún > problema. > est sd tstat prt > -6.241814e-04 4.386667e-05 -1.422906e+01 9.875019e-21 > > ¿Alguna sugerencia? > Gracias > Emilio > PD. Aquí va el código. > > > rm(list=ls()) > > datos <- structure(list(x = c(1322988541, 1322988561, 1322988571, > 1322988581, > 1322988591, 1322988601, 1322988631, 1322988641, > 1322988671, 1322988691, > 1322988701, 1322988741, 1322988851, 1322988861, > 1322988881, 1322988901, > 1322988921, 1322988941, 1322988971, 1322989031, > 1322989051, 1322989101, > 1322989145, 1322989155, 1322989165, 1322989195, > 1322989205, 1322989225, > 1322989255, 1322989285, 1322989305, 1322989325, > 1322989335, 1322989345, > 1322989385, 1322989395, 1322989425, 1322989465, > 1322989515, 1322989545, > 1322989565, 1322989585, 1322989635, 1322989645, > 1322989655, 1322989735, > 1322989775, 1322989845, 1322989965, 1322990015, > 1322990065, 1322990125, > 1322990165, 1322990325, 1322990375, 1322990395, > 1322990435, 1322990475, > 1322990495, 1322990565, 1322990585, 1322990605, > 1322990615), > y = c(0, 0.0331545271786949, 0.0441494615282616, > 0.0588281430638927, > 0.105414946889965, 0.114584495389247, 0.136280527285279, > 0.222000735729392, 0.270940599915131, 0.297288944178842, > 0.318393064910918, 0.355511383046162, 0.523060399246142, > 0.533111548416975, 0.564842299095897, 0.588676620944726, > 0.611545644320544, 0.634421359300242, 0.666313521634898, > 0.720167541778452, 0.73305715343862, 0.771462932299023, > 0.786787376834878, > 0.822455110136009, 0.842948522760745, 0.89054108302027, > 0.896418224686584, > 0.912860950728539, 0.948749247273654, 0.968819427542235, > 0.994936619570842, 1.01262016683246, 1.02718993008829, > 1.03825902504014, > 1.07121071404195, 1.08254211763514, 1.09702138199726, > 1.12365971853979, > 1.18661502223515, 1.19851974419895, 1.21335779153356, > 1.23353641854298, > 1.27276015781082, 1.28550028269269, 1.30824810251295, > 1.33422806471771, > 1.36068254611751, 1.41264023191542, 1.55647787533637, > 1.62241537222118, > 1.68843510799368, 1.76907947225848, 1.81924949620336, > 2.01387957987011, > 2.06261906430172, 2.07163544563991, 2.09305303167851, > 2.11268774697497, > 2.12451660254162, 2.13053196371228, 2.1530958080546, > 2.17003590901411, > 2.18029803192084), grupo = structure(c(1L, 1L, 1L, 1L, > 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L), .Label = c("1", > > "2"), class = "factor")), .Names = c("x", "y", > "grupo"), row.names = c(NA, > > > -63L), class = "data.frame") > > > modelo <- lm(y~x*grupo, data =datos) > summary(modelo) ## el coeficiente x:grupo2 no funciona > > ## Coefficients: (1 not defined because of singularities) > ## Estimate Std. Error t value Pr(>|t|) > ## (Intercept) -1.283e+06 2.276e+04 -56.359 < 2e-16 *** > ## x 9.696e-04 1.720e-05 56.359 < 2e-16 *** > ## grupo2 8.940e-02 2.339e-02 3.823 0.000316 *** > ## x:grupo2 NA NA NA NA > > sacarelcoeficienteamano <- function(dat) { > fits <- lapply(split(dat,dat$grupo),**lm,formula=y~x) > sums <- lapply(fits,summary) > coefs <- lapply(sums,coef) > db <- coefs[[2]]["x","Estimate"]-**coefs[[1]]["x","Estimate"] > sd <- sqrt(sum(sapply(coefs,**function(x) x["x","Std. Error"])^2)) > df <- sum(sapply(fits,"[[","df.**residual")) > td <- db/sd > c(est=db,sd=sd,tstat=td,prt=2***pt(-abs(td),df)) > } > > sacarelcoeficienteamano(datos) > ## est sd tstat prt > ## -6.241814e-04 4.386667e-05 -1.422906e+01 9.875019e-21 > > library(ggplot2) > ggplot() + geom_point(aes(x=x, y=y, color=grupo), data =datos) + > geom_smooth(aes(x=x, y=y, group=grupo), method = "lm", data = datos) > > > sessionInfo() > ## R version 2.14.2 (2012-02-29) > ## Platform: i486-pc-linux-gnu (32-bit) > > ## locale: > ## [1] LC_CTYPE=es_ES.UTF-8 LC_NUMERIC=C > ## [3] LC_TIME=es_ES.UTF-8 LC_COLLATE=es_ES.UTF-8 > ## [5] LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=es_ES.UTF-8 > ## [7] LC_PAPER=C LC_NAME=C > ## [9] LC_ADDRESS=C LC_TELEPHONE=C > ## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C > > ## attached base packages: > ## [1] stats graphics grDevices utils datasets methods base > > ## other attached packages: > ## [1] ggplot2_0.9.0 > > ## loaded via a namespace (and not attached): > ## [1] colorspace_1.1-1 dichromat_1.2-4 digest_0.5.1 > grid_2.14.2 > ## [5] MASS_7.3-16 memoise_0.1 munsell_0.3 plyr_1.7.1 > ## [9] proto_0.3-9.2 RColorBrewer_1.0-5 reshape2_1.2.1 > scales_0.2.0 > ## [13] stringr_0.6 > -- > ------------------------------**------------------- > Emilio Torres Manzanera > Fac. de Comercio - Universidad de Oviedo > c/ Luis Moya 261, E-33203 Gijón (Spain) > Tel. 985 182 197 email: torres@uniovi.es > > ______________________________**_________________ > R-help-es mailing list > R-help-es@r-project.org > stat.ethz.ch/mailman/**listinfo/r-help-es<stat.ethz.ch/mailman/listinfo/r-help-es> >[[alternative HTML version deleted]]
Otra explicación es esta: r.789695.n4.nabble.com/lm-coefficient-not-defined-because-of-singularities-What-does-this-mean-td847950.html Tiene *mucho* que ver con los valores de tus datos$x... Si haces esto:> > datos$xx <- datos$x - 1322980000> > modelo <- lm(y~xx *grupo, data =datos)> summary(modelo)Call: lm(formula = y ~ xx * grupo, data = datos) Residuals: Min 1Q Median 3Q Max -0.07609 -0.02090 -0.00030 0.01852 0.07125 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.341e+01 4.339e-01 -30.90 <2e-16 *** xx 1.573e-03 4.970e-05 31.64 <2e-16 *** grupo2 5.559e+00 4.433e-01 12.54 <2e-16 *** xx:grupo2 -6.242e-04 5.057e-05 -12.34 <2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.03121 on 59 degrees of freedom Multiple R-squared: 0.9978, Adjusted R-squared: 0.9976 F-statistic: 8752 on 3 and 59 DF, p-value: < 2.2e-16>Saludos, Carlos Ortega qualityexcellence.es El 31 de marzo de 2012 21:17, Emilio Torres Manzanera <torres@uniovi.es>escribió:> > > > Hola, > quiero hacer una regresión lineal > lm(y ~ x * grupo, data =datos) > y: numérica, x: numérica, grupo: factor con dos niveles (1 y 2) > pero no calcula el coeficiente de x:grupo2 a cuenta de una singularidad > > Coefficients: (1 not defined because of singularities) > Estimate Std. Error t value Pr(>|t|) > (Intercept) -1.283e+06 2.276e+04 -56.359 < 2e-16 *** > x 9.696e-04 1.720e-05 56.359 < 2e-16 *** > grupo2 8.940e-02 2.339e-02 3.823 0.000316 *** > x:grupo2 NA NA NA NA > > Sin embargo, si lo hago paso a paso obtengo el coeficiente sin ningún > problema. > est sd tstat prt > -6.241814e-04 4.386667e-05 -1.422906e+01 9.875019e-21 > > ¿Alguna sugerencia? > Gracias > Emilio > PD. Aquí va el código. > > > rm(list=ls()) > > datos <- structure(list(x = c(1322988541, 1322988561, 1322988571, > 1322988581, > 1322988591, 1322988601, 1322988631, 1322988641, > 1322988671, 1322988691, > 1322988701, 1322988741, 1322988851, 1322988861, > 1322988881, 1322988901, > 1322988921, 1322988941, 1322988971, 1322989031, > 1322989051, 1322989101, > 1322989145, 1322989155, 1322989165, 1322989195, > 1322989205, 1322989225, > 1322989255, 1322989285, 1322989305, 1322989325, > 1322989335, 1322989345, > 1322989385, 1322989395, 1322989425, 1322989465, > 1322989515, 1322989545, > 1322989565, 1322989585, 1322989635, 1322989645, > 1322989655, 1322989735, > 1322989775, 1322989845, 1322989965, 1322990015, > 1322990065, 1322990125, > 1322990165, 1322990325, 1322990375, 1322990395, > 1322990435, 1322990475, > 1322990495, 1322990565, 1322990585, 1322990605, > 1322990615), > y = c(0, 0.0331545271786949, 0.0441494615282616, > 0.0588281430638927, > 0.105414946889965, 0.114584495389247, 0.136280527285279, > 0.222000735729392, 0.270940599915131, 0.297288944178842, > 0.318393064910918, 0.355511383046162, 0.523060399246142, > 0.533111548416975, 0.564842299095897, 0.588676620944726, > 0.611545644320544, 0.634421359300242, 0.666313521634898, > 0.720167541778452, 0.73305715343862, 0.771462932299023, > 0.786787376834878, > 0.822455110136009, 0.842948522760745, 0.89054108302027, > 0.896418224686584, > 0.912860950728539, 0.948749247273654, 0.968819427542235, > 0.994936619570842, 1.01262016683246, 1.02718993008829, > 1.03825902504014, > 1.07121071404195, 1.08254211763514, 1.09702138199726, > 1.12365971853979, > 1.18661502223515, 1.19851974419895, 1.21335779153356, > 1.23353641854298, > 1.27276015781082, 1.28550028269269, 1.30824810251295, > 1.33422806471771, > 1.36068254611751, 1.41264023191542, 1.55647787533637, > 1.62241537222118, > 1.68843510799368, 1.76907947225848, 1.81924949620336, > 2.01387957987011, > 2.06261906430172, 2.07163544563991, 2.09305303167851, > 2.11268774697497, > 2.12451660254162, 2.13053196371228, 2.1530958080546, > 2.17003590901411, > 2.18029803192084), grupo = structure(c(1L, 1L, 1L, 1L, > 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L), .Label = c("1", > > "2"), class = "factor")), .Names = c("x", "y", > "grupo"), row.names = c(NA, > > > -63L), class = "data.frame") > > > modelo <- lm(y~x*grupo, data =datos) > summary(modelo) ## el coeficiente x:grupo2 no funciona > > ## Coefficients: (1 not defined because of singularities) > ## Estimate Std. Error t value Pr(>|t|) > ## (Intercept) -1.283e+06 2.276e+04 -56.359 < 2e-16 *** > ## x 9.696e-04 1.720e-05 56.359 < 2e-16 *** > ## grupo2 8.940e-02 2.339e-02 3.823 0.000316 *** > ## x:grupo2 NA NA NA NA > > sacarelcoeficienteamano <- function(dat) { > fits <- lapply(split(dat,dat$grupo),**lm,formula=y~x) > sums <- lapply(fits,summary) > coefs <- lapply(sums,coef) > db <- coefs[[2]]["x","Estimate"]-**coefs[[1]]["x","Estimate"] > sd <- sqrt(sum(sapply(coefs,**function(x) x["x","Std. Error"])^2)) > df <- sum(sapply(fits,"[[","df.**residual")) > td <- db/sd > c(est=db,sd=sd,tstat=td,prt=2***pt(-abs(td),df)) > } > > sacarelcoeficienteamano(datos) > ## est sd tstat prt > ## -6.241814e-04 4.386667e-05 -1.422906e+01 9.875019e-21 > > library(ggplot2) > ggplot() + geom_point(aes(x=x, y=y, color=grupo), data =datos) + > geom_smooth(aes(x=x, y=y, group=grupo), method = "lm", data = datos) > > > sessionInfo() > ## R version 2.14.2 (2012-02-29) > ## Platform: i486-pc-linux-gnu (32-bit) > > ## locale: > ## [1] LC_CTYPE=es_ES.UTF-8 LC_NUMERIC=C > ## [3] LC_TIME=es_ES.UTF-8 LC_COLLATE=es_ES.UTF-8 > ## [5] LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=es_ES.UTF-8 > ## [7] LC_PAPER=C LC_NAME=C > ## [9] LC_ADDRESS=C LC_TELEPHONE=C > ## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C > > ## attached base packages: > ## [1] stats graphics grDevices utils datasets methods base > > ## other attached packages: > ## [1] ggplot2_0.9.0 > > ## loaded via a namespace (and not attached): > ## [1] colorspace_1.1-1 dichromat_1.2-4 digest_0.5.1 > grid_2.14.2 > ## [5] MASS_7.3-16 memoise_0.1 munsell_0.3 plyr_1.7.1 > ## [9] proto_0.3-9.2 RColorBrewer_1.0-5 reshape2_1.2.1 > scales_0.2.0 > ## [13] stringr_0.6 > -- > ------------------------------**------------------- > Emilio Torres Manzanera > Fac. de Comercio - Universidad de Oviedo > c/ Luis Moya 261, E-33203 Gijón (Spain) > Tel. 985 182 197 email: torres@uniovi.es > > ______________________________**_________________ > R-help-es mailing list > R-help-es@r-project.org > stat.ethz.ch/mailman/**listinfo/r-help-es<stat.ethz.ch/mailman/listinfo/r-help-es> >-- Saludos, Carlos Ortega qualityexcellence.es [[alternative HTML version deleted]]