Luigi Marongiu
2016-Feb-15 14:36 UTC
[R] non parametric algorithm npEM (library mixtools) usage
Dear all, I am using mixtols to find cut-off values from datasets. At the moment I can run the parametric function normalmixEM from the library mixtools, but I owuld like to generalize the procedure with the non parametric version npEM. However I am confused regarding the arguments to use in this implementation. I reckon that x should be the dataframe of the values I have, mu0 should probably give the locations of the mode (or modes if the data are described by overlapping distributions) of the data, but all the other arguments are beyond me. could you please give me a hint? thank you luigi Here I am placing an example for the normalmixEM function that provides a cut-off for the value highlighted with the arrow.>>>xval <- c(42.39, 44.53, 5.05, 6.9, 45, 2.35, 20.73, 3.31, 45, 4.76, 2.47, 2.37, 2.8, 3.26, 3.21, 45, 6.41, 4.77, 4.72, 4.89, 44.71, 2.8, 4.08, 4.07, 4.81, 2.93, 2.73, 44.75, 2.61, 2.56, 2.75, 2.75, 44.82, 36.59, 2.8, 43.82, 2.53, 2.75, 2.73, 3.05, 2.66, 5.61, 2.28, 4.83, 38.63, 44.23, 2.35, 2.47, 44.03, 6.33, 2.7, 2.96, 42.85, 2.47, 2, 12.76, 2.99, 2, 35.11, 2.63, 44.69, 2.96, 45, 42.13, 41.04, 3.22, 45, 45, 2.55, 4.58, 3.09, 39.98, 2, 2.97, 2.87, 2, 44.82, 45, 2.95, 45, 2, 2.82, 2.47, 2.98, 4.81, 44.53, 44.38, 2.87, 44.45, 2.9, 2.48, 44.14, 3.05, 2.76, 45, 45, 44.54, 42.85, 3.17, 2.46, 39.95, 36.96, 2.59, 2.75, 5.38, 2.8, 44.53, 45, 38.84, 4.64, 3.04, 2.59, 2.64, 45, 2.66, 44.37, 45, 26.32, 3.29, 40.44, 2, 41.51, 2, 45, 2, 5, 2.78, 2.11, 3.31, 2.61, 2.83, 2.6, 2.66, 2.95, 2.46, 2.58, 2.94, 45, 45, 2.71, 2.63, 2.81, 2, 3.29, 5.48, 45, 3.02, 2.82, 3.07, 2.65, 2.61, 2.67, 36.6, 2.08, 40.2, 45, 2.5, 45, 41.46, 45, 2.62, 2.77, 4.14, 2.63, 3.21, 4.79, 42.63, 2.66, 45, 4.69, 3.05, 45, 45, 2.97, 42.07, 2.73, 3.26, 5.17, 2.47, 44.66, 2.42, 5.14, 5.03, 2.65, 2.88, 2.69, 44.1, 3.15, 4.92, 42.02, 6.97, 2.46, 35.98, 2.95, 32.98, 2.79, 44.82, 2.84, 2.15, 44.42, 2.96, 45, 2.42, 2.75, 2.44, 4.58, 2, 45, 41.04, 4.04, 3.08, 2.46, 44.54, 3.21, 39.16, 2, 35.36, 3.08, 5.77, 2.71, 4.41, 2.46, 44.43, 2.62, 45, 2.7, 45, 41.43, 4.65, 3.05, 4.76, 40.66, 32.88, 45, 44.94, 44.67, 3.07, 2.92, 2.75, 2.63, 2.68, 34.15, 3.27, 2.47, 2, 2.63, 45, 3.06, 42.53, 35.25, 2.82, 42.62, 5.83, 4.69, 38.04, 2.47, 38.14, 3.73, 10, 4.93, 4.93, 4.65, 40.8, 2.32, 5.53, 3.01, 41.13, 4.5, 2.65, 44.85, 5.02, 2, 39.99, 2.89, 3.09, 2, 43.77, 44.53, 4.09, 6.22, 3.31, 44.64, 4.65, 45, 6.68, 39.93, 45, 2.77, 2.51, 2, 45, 4.08, 4.61, 6.11, 3.02, 44.8, 45, 44.54, 2.95, 2.77) yval <- c(-0.002, 0.001, 0.002, 0.001, -0.001, 0.003, 0.003, 0.005, 0, 0.011, 0.003, 0.011, 0.004, 0.012, 0.004, 0.005, 0.001, 0.007, 0.006, 0.007, -0.001, 0.011, 0.005, 0.002, 0.007, 0.028, 0.01, 0.002, 0.003, 0.007, 0.033, 0.006, 0.003, 0, 0.01, 0.018, 0.01, 0.008, 0.002, 0.022, 0.02, 0.002, 0.006, 0.008, 0, -0.002, -0.001, 0.001, 0.001, 0.007, 0.005, 0.011, 0.004, 0.001, 0.005, 0.001, 0.019, 0.002, 0.001, 0.002, -0.001, 0.003, 0.001, 0, -0.001, 0.002, 0.005, 0.001, 0, 0.007, 0.011, -0.001, 0.002, 0.01, 0.004, 0.003, 0.001, 0, 0.015, 0.004, 0.001, 0.003, 0.003, 0.027, 0.005, 0, 0.003, 0.003, 0, 0.017, 0.004, -0.001, 0.043, 0.003, -0.001, 0.001, 0, 0, 0.019, 0.003, -0.001, 0.001, 0.009, 0.013, 0.001, 0.021, 0.001, -0.001, -0.001, 0.002, 0.008, 0.004, 0.007, 0.001, 0.007, 0.001, 0, 0.001, 0.004, -0.001, 0.001, 0, 0.007, 0.003, 0.002, 0.001, 0.028, 0.002, 0.005, 0.013, 0.017, 0.013, 0.009, 0.021, 0.01, 0.007, 0.015, 0, 0.002, 0.002, 0.013, 0.012, 0, 0.034, 0.005, 0, 0.041, 0.02, 0.036, 0.004, 0.002, 0.004, 0.001, 0.001, 0.001, 0.003, 0.016, 0.002, 0, 0, 0.002, 0.009, 0.006, 0.022, 0.002, 0.01, 0, 0.012, 0.006, 0.01, 0.005, 0.001, 0.001, 0.027, 0.003, 0.002, 0.02, 0.014, 0.003, 0.003, -0.001, 0.002, 0.008, 0.011, 0.014, 0.017, 0, 0.015, 0.008, 0.001, 0.005, 0.002, 0.001, 0.012, 0.002, 0.004, 0, 0.012, 0, 0.001, 0.005, 0.001, 0.008, 0.011, 0.006, 0.007, 0.005, 0, 0, 0.003, 0.004, 0.002, 0, 0.015, -0.001, 0.002, -0.001, 0.006, 0.005, 0.006, 0.003, 0.002, 0.001, 0.001, 0, 0.007, 0.002, -0.001, 0.033, 0.01, 0.007, 0.003, 0, 0.001, -0.001, 0.011, 0.006, 0.015, 0.013, 0.01, 0.015, 0, 0.014, 0.001, 0.001, 0.003, 0, 0.012, -0.001, 0.002, 0.027, -0.001, 0.009, 0.021, 0.001, 0.001, -0.001, 0.008, 0.001, 0.017, 0.002, 0.027, 0.003, -0.001, 0.018, 0.026, -0.001, 0.004, 0.003, 0.001, 0.019, 0.001, 0.001, 0.019, 0.005, 0.001, -0.001, 0.002, 0.001, 0.001, 0.004, 0.002, 0.007, 0.003, 0.01, 0.002, 0.003, 0.002, 0.005, 0.003, 0, 0.004, 0.032, 0.005, 0.018, 0.002, 0.001, 0.002, 0.003, 0.005) df <- data.frame(xval,yval) DF <- subset(df, xval>=10) sdmult = 10 # A guess to get started library(mixtools) model = normalmixEM((x = DF$yval)) muneg = model$mu[1] # Mean of negative population sdneg = model$sigma[1] # SD of negative population mupos = model$mu[2] # Mean of positive population sdpos = model$sigma[2] # SD of positive population co = muneg + sdmult*sdneg # Calculate threshold based on SD multiplier plot(DF$yval ~ DF$xval) abline(h=co, lty=2) arrows(37,0.018, 42, 0.018, col="red")