Michael.Laviolette at dhhs.state.nh.us
2013-Oct-10 17:16 UTC
[R] Using calibrate for raking (survey package)
I'm studying the calibration function in the survey package in preparation for raking some survey data. Results from the rake function below agree with other sources. When I run calibrate, I get a warning message and the M and F weights seem to be reversed. Even allowing for that, the deviation between calibrated and raked weights is much more than I expected. I see that in the calibrate function "population" is supposed to be a vector or table, but can't figure out how to adjust. Can you clarify? Thanks. -M. Laviolette satisfy <- c(2,5,2,3,4,3,3,3,4,2,2,3,2,3,4,3,3,2,3,3,4,3,3,3,2, 3,3,3,2,1,4,4,3,3,2,3,4,2,3,3,3,5,3,1,4,3,3,4,4,2, 3,3,3,5,4,4,5,3,4,4,5,3,3,4,3,3,3,3,2,4,4,3,3,4,3, 2,4,4,3,4,4,4,5,3,3,4,4,4,3,2,2,4,3,4,3,4,4,3,3,3, 3,4,4,4,4,3,3,3,3,2,3,3,2,2,5,4,5,2,4,4,4,3,4,4,2, 4,4,3,4,3,4,2,3,3,2,4,3,4,4,3,5,2,4,4,3,4,5,3,3,3, 3,2,3,4,4,4,2,4,4,2,3,5,2,2,3,3,3,3,3,4,4,3,3,4,4, 4,4,4,4,4,4,3,2,3,3,3,3,4,4,4,3,3,4,3,4,4,4,3,3,2) Gender <- c(1,2,1,1,2,1,1,2,1,2,1,1,2,1,1,1,1,2,2,1,2,1,1,2,1, 2,1,1,2,2,1,1,2,1,2,2,1,1,1,1,2,1,1,1,1,1,1,1,2,1, 1,1,1,1,2,1,1,2,2,2,2,2,2,2,2,2,1,1,1,1,2,1,2,1,2, 1,1,2,1,1,2,1,1,1,1,1,1,2,1,1,2,2,2,2,1,1,2,2,1,2, 1,1,2,1,2,1,2,2,1,1,1,2,1,1,1,2,1,1,2,1,2,2,2,1,1, 2,2,1,1,1,2,1,2,1,2,2,1,1,1,2,2,1,2,2,2,2,1,2,2,1, 2,1,2,1,1,2,2,1,1,1,2,2,1,2,2,2,1,2,2,1,1,1,2,2,2, 1,2,1,2,2,2,2,1,1,2,1,1,1,2,1,1,2,2,1,1,1,1,2,1,1) Age <- c(2,3,2,1,2,2,2,2,3,2,2,1,2,2,2,2,2,2,2,2,2,2,2,3,2, 3,3,3,1,2,2,3,2,2,2,1,3,2,2,2,2,2,2,3,2,2,2,2,2,1, 3,3,2,3,2,2,2,2,2,2,2,3,2,2,1,2,2,2,1,2,2,3,2,2,1, 2,2,1,2,2,1,2,2,2,2,2,2,2,3,2,2,1,3,2,2,2,3,2,2,2, 3,1,2,1,2,2,1,2,2,2,2,2,2,1,2,2,3,1,2,2,2,2,2,2,2, 2,3,1,1,2,1,2,2,2,2,2,2,2,2,1,3,2,2,2,1,2,1,1,2,1, 2,1,1,2,2,2,2,2,2,2,2,3,2,1,2,1,1,2,3,3,1,3,3,2,2, 2,2,2,2,2,2,2,3,2,3,3,2,2,2,3,1,2,1,2,3,2,2,2,3,2) emp.dat <- data.frame(Gender = factor(Gender, labels = c("M", "F")), Age = factor(Age, labels = c("<30", "30-44", "45 +")), satisfy) pop.gender <- data.frame(Gender = c("M", "F"), Freq = c(3800, 6200)) pop.age <- data.frame(Age = c("<30", "30-44", "45+"), Freq = c(2000, 5000, 3000)) library(survey) emp.svy <- svydesign(ids = ~0, strata = NULL, weights = ~rep(50, 200), data = emp.dat) rake.svy <- rake(emp.svy, list(~Gender, ~Age), list(pop.gender, pop.age)) cal.svy <- calibrate(emp.svy, formula = list(~Gender, ~Age), population = list(pop.gender, pop.age), cal.fun "raking") # Warning message: # In regcalibrate.survey.design2(design, formula, population, aggregate.stage # = aggregate.stage, :Sample and population totals have different names. # check weights--M and F seem reversed when "calibrate" used library(reshape2) check1 <- with(rake.svy, cbind(variables, weight = 1/prob)) dcast(check1, Gender~Age, sum, value.var = "weight", margins = TRUE) check2 <- with(cal.svy, cbind(variables, weight = 1/prob)) dcast(check2, Gender~Age, sum, value.var = "weight", margins = TRUE)