All, I would like to apply a set of rules to each row of the sample data set below. The rule sets are the guidelines for determining an individual's date for retirement eligibility. The rules are found in this document, http://www.opm.gov/feddata/RetirementPaperFinal_v4.pdf. I am only interested in the top two categories for retirement eligibility, the CSRS and FERS plans. The data set has four variables Date of Birth (DOB), service computation date (srvCompDT), retirement plan (retirePlan), and the age at which the employee entered federal service (ageFedStart). The service computation date is used to compute the date eligible for retirement. The retirement plan indicates what system the employee is enrolled under. The data does contain a few other retirement plans, for now I want to just ignore those plans. I have labeled plans as 1-CSRS and 2-FERS, and 3-Other. My first attempt at applying the rules was through a complex nesting of ifelse statements, this was not very successful and quite difficult to follow. I then wrote a function and tried using "apply" unsuccessfully. The function is shown below. I would like to put a short script or function together that would allow for an efficient application of the rules to each of the employees. I am trying to avoid a loop, because my data set is quite large, and I may need to update my data set regularly and re-run the analysis and reports that will come from this work. Any advice or guidance on building the function or code to apply the rules would be quite helpful. retireHelp <- structure(list(DOB = structure(c(-6642, -5134, -3444, -5598, -4356, 5737, -4894, -1951, -2950, 2467, 6945, 4908, -7930, -7236, -7727, -77, 4158, -7892, -6028, -7132, -5959, 2309, -2494, -3513, -383, -216, -3369, -5861, 3674, -10265, -8986, -5023, -4862, 1526, -1022, 2175, -11790, -278, -7275, -5084, -1842, 430, -2220, -7444, 440, 4285, -7812, 3335, -7271, -6825, -1098, -1670, -10219, -7131, 5963, 704, -7662, 4219, -2813, 5147, -7334, -8223, -5922, -7497, -9276, -1291, -11640, -5631, 518, -7268, -2105, -5901, -690, -8146, -7059, 133, 1176, -6091, -2895, -6020, -4724, -3616, -5059, -8253, -2604, -12400, -4776, -3671, -9326, -7000, -5574, -3248, 4255, -1358, -6255, 8, -7115, -1701, -5227, 9, -517, -8674, -2554, -4069, -2077, -9872, -6534, 2970, -8307, -3020, -1343, -8897, -2304, -7424, 2078, -8274, -5559, -8888, -9262, -8473, -4088, -2429, -8006, -1091, 5015, 2765, 4036, 3101, -3743, 5103, -10018, -12095, -7646, -5966, -6208, -5784, -1325, -4288, -1665, -1409, 4685, -7881, -3413, 2738, -2201, 1217, -5113, 206, -1292, -1725, 10, -2978, -1895, -830, -105, -2395, -3496, -8244, -9956, -6494, -4678, -4077, 575, 2013, -3411, 3824, -4356, 4523, -5836, -6350, -5337, -41, -2001, -6632, -970, -6790, -2828, -4061, 476, 5854, -9648, -4227, 850, 2619, -7747, -2672, 4069, -12618, -6898, -4178, -1772, -1643, -2064, -157, 4551, -8688, -6087, -2040, -7239, -783), format = "m/d/y", origin = structure(c(1, 1, 1970 ), .Names = c("month", "day", "year")), class = c("dates", "times" )), srvCompDT = structure(c(743, 12429, 3585, 4364, 13227, 13578, 13591, 8585, 9587, 13913, 14753, 13247, 2246, 1439, 8845, 7018, 12625, -552, 5688, 7080, 13255, 13549, 12709, 13969, 13997, 9532, 13689, 1226, 13549, 4093, 13423, 13801, 3181, 14809, 13353, 9457, 7745, 8986, 4759, 4486, 6449, 11172, 8669, 3344, 13745, 12275, 5081, 13605, 8006, 3048, 6330, 13521, 5254, 1733, 14095, 8516, 4848, 13521, 5970, 14697, 8291, 139, 11435, 3567, 8961, 5775, 3602, 1409, 11577, 12163, 12258, 13156, 9472, 7963, 1362, 10332, 9557, 3997, 7509, 4691, 3133, 5877, 6782, 11449, 13283, 8040, 11565, 3425, 7860, 1790, 10778, 13199, 12625, 5889, 3317, 9831, 1068, 8040, 7123, 9104, 12836, 7928, 12764, 8922, 5324, -1004, 1806, 10263, 5635, 10310, 5625, 8861, 14613, 3896, 10316, 5725, 12751, 6113, 2997, 112, 5707, 4987, -1018, 8055, 13885, 13073, 14585, 14865, 14935, 14390, 9735, 7654, 4557, 661, 1638, 1112, 14011, 3086, 7032, 13942, 13325, 6735, 13900, 12673, 10148, 14193, 14767, 8447, 6114, 10688, 13544, 7106, 8587, 14753, 7886, 12280, 11946, 13662, 3332, 2108, 13977, 6203, 8369, 13857, 8369, 11486, 8306, 12466, 12639, 7270, 4325, 13843, 14026, 14039, 6147, 7676, 5781, 7038, 9187, 14640, 6174, 11491, 13913, 13787, 13465, 8854, 13152, 1826, 1412, 4317, 5794, 5548, 8951, 12947, 12639, 5345, 5961, 4637, 6465, 13717), format = "m/d/y", origin = structure(c(1, 1, 1970), .Names = c("month", "day", "year")), class = c("dates", "times")), retirePlan = c(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 3, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 3, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 3, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 3, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), ageFedStart = c(20.22, 48.08, 19.24, 27.27, 48.14, 21.47, 50.61, 28.85, 34.32, 31.34, 21.38, 22.83, 27.86, 23.75, 45.37, 19.43, 23.18, 20.1, 32.08, 38.91, 52.61, 30.77, 41.62, 47.86, 39.37, 26.69, 46.7, 19.4, 27.04, 39.31, 61.35, 51.54, 22.02, 36.37, 39.36, 19.94, 53.48, 25.36, 32.95, 26.2, 22.7, 29.41, 29.81, 29.54, 36.43, 21.88, 35.3, 28.12, 41.83, 27.03, 20.34, 41.59, 42.36, 24.27, 22.26, 21.39, 34.25, 25.47, 24.05, 26.15, 42.78, 22.89, 47.52, 30.29, 49.93, 19.35, 41.73, 19.27, 30.28, 53.2, 39.32, 52.18, 27.82, 44.1, 23.06, 27.92, 22.95, 27.62, 28.48, 29.33, 21.51, 25.99, 32.42, 53.94, 43.5, 55.96, 44.74, 19.43, 47.05, 24.07, 44.77, 45.03, 22.92, 19.84, 26.21, 26.89, 22.4, 26.67, 33.81, 24.9, 36.56, 45.45, 41.94, 35.57, 20.26, 24.28, 22.83, 19.97, 38.17, 36.5, 19.08, 48.62, 46.32, 30.99, 22.55, 38.33, 50.13, 41.07, 33.56, 23.5, 26.82, 20.3, 19.13, 25.04, 24.28, 28.22, 28.88, 32.21, 51.14, 25.43, 54.08, 54.07, 33.41, 18.14, 21.48, 18.88, 41.99, 20.19, 23.81, 42.03, 23.66, 40.02, 47.4, 27.2, 33.81, 35.53, 54.43, 22.56, 20.28, 33.98, 37.05, 27.61, 28.7, 42.66, 21.88, 40.18, 42.28, 59.98, 36.38, 23.55, 51.07, 28.15, 21.34, 32.43, 32.25, 20.98, 34.67, 21.75, 50.58, 37.29, 26.45, 38.01, 43.88, 56.59, 19.49, 39.61, 23.57, 30.39, 23.85, 24.05, 43.32, 43.03, 35.76, 30.58, 58.08, 31.56, 24.87, 39.55, 22.75, 23.26, 20.71, 19.69, 30.16, 35.88, 22.14, 38.42, 32.99, 18.28, 37.52, 39.7)), .Names = c("DOB", "srvCompDT", "retirePlan", "ageFedStart"), row.names = c(NA, 200L), class "data.frame") rrDT <- function(retSys, ageFedStart, birthDT, serviceCompDT){ if(retSys == "CSRS") { if(ageFedStart < 25) rtDT <- dates(birthDT+(365.25*55)) else if (ageFedStart >= 25 & ageFedStart < 30) rtDT <- dates(serviceCompDT+(365.25*30)) else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- dates(birthDT+(365.25*60)) else if (ageFedStart >= 40 & ageFedStart < 45) rtDT <- dates(serviceCompDT+(365.25*20)) else if (ageFedStart >= 45 & ageFedStart < 60) rtDT <- dates(birthDT+(365.25*65)) else if (ageFedStart >= 60) rtDT <- dates(serviceCompDT+(365.25*5)) else rtDT <- NA } else if (retSys == "FERS") { if (birthDT < "01/01/53") { if(ageFedStart < 25) rtDT <- dates(birthDT+(365.25*55)) else if (ageFedStart >= 25 & ageFedStart < 30) rtDT <- dates(serviceCompDT+(365.25*30)) else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- dates(birthDT+(365.25*60)) else if (ageFedStart >= 40 & ageFedStart < 42) rtDT <- dates(serviceCompDT+(365.25*20)) else if (ageFedStart >= 42 & ageFedStart < 57) rtDT <- dates(birthDT+(365.25*62)) else if (ageFedStart >= 57) rtDT <- dates(serviceCompDT+(365.25*5)) else rtDT <- NA } else if (birthDT >= "01/01/53" & birthDT < "01/01/70") { if(ageFedStart < 26) rtDT <- dates(birthDT+(365.25*56)) else if (ageFedStart >= 27 & ageFedStart < 30) rtDT <- dates(serviceCompDT+(365.25*30)) else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- dates(birthDT+(365.25*60)) else if (ageFedStart >= 40 & ageFedStart < 42) rtDT <- dates(serviceCompDT+(365.25*20)) else if (ageFedStart >= 42 & ageFedStart < 57) rtDT <- dates(birthDT+(365.25*62)) else if (ageFedStart >= 57) rtDT <- dates(serviceCompDT+(365.25*5)) else rtDT <- NA } else if (birthDT >= "01/01/70"){ if(ageFedStart < 27) rtDT <- dates(birthDT+(365.25*56)) else if (ageFedStart >= 27 & ageFedStart < 30) rtDT <- dates(serviceCompDT+(365.25*30)) else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- dates(birthDT+(365.25*60)) else if (ageFedStart >= 40 & ageFedStart < 42) rtDT <- dates(serviceCompDT+(365.25*20)) else if (ageFedStart >= 42 & ageFedStart < 57) rtDT <- dates(birthDT+(365.25*62)) else if (ageFedStart >= 57) rtDT <- dates(serviceCompDT+(365.25*5)) else rtDT <- NA } } else rtDT <- NA return(rtDT) } Adrian R. Katschke Data Analytics Specialist Human Capital Program Office Human Resources PH: 317-212-7813 DSN: 699-7813
If I understand you correctly, you want ?ifelse, which works on the full logical vectors of rules applied to the variables, not if....else, which works on only a single logical. -- Bert Gunter On Wed, Jan 26, 2011 at 12:18 PM, KATSCHKE, ADRIAN CIV DFAS <ADRIAN.KATSCHKE at dfas.mil> wrote:> All, > > I would like to apply a set of rules to each row of the sample data set > below. The rule sets are the guidelines for determining an individual's > date for retirement eligibility. The rules are found in this document, > http://www.opm.gov/feddata/RetirementPaperFinal_v4.pdf. I am only > interested in the top two categories for retirement eligibility, the > CSRS and FERS plans. > > The data set has four variables Date of Birth (DOB), service computation > date (srvCompDT), retirement plan (retirePlan), and the age at which the > employee entered federal service (ageFedStart). The service computation > date is used to compute the date eligible for retirement. The retirement > plan indicates what system the employee is enrolled under. > > The data does contain a few other retirement plans, for now I want to > just ignore those plans. I have labeled plans as 1-CSRS and 2-FERS, and > 3-Other. My first attempt at applying the rules was through a complex > nesting of ifelse statements, this was not very successful and quite > difficult to follow. I then wrote a function and tried using "apply" > unsuccessfully. The function is shown below. > > I would like to put a short script or function together that would allow > for an efficient application of the rules to each of the employees. I am > trying to avoid a loop, because my data set is quite large, and I may > need to update my data set regularly and re-run the analysis and reports > that will come from this work. > > Any advice or guidance on building the function or code to apply the > rules would be quite helpful. > > retireHelp <- > structure(list(DOB = structure(c(-6642, -5134, -3444, -5598, > -4356, 5737, -4894, -1951, -2950, 2467, 6945, 4908, -7930, -7236, > -7727, -77, 4158, -7892, -6028, -7132, -5959, 2309, -2494, -3513, > -383, -216, -3369, -5861, 3674, -10265, -8986, -5023, -4862, > 1526, -1022, 2175, -11790, -278, -7275, -5084, -1842, 430, -2220, > -7444, 440, 4285, -7812, 3335, -7271, -6825, -1098, -1670, -10219, > -7131, 5963, 704, -7662, 4219, -2813, 5147, -7334, -8223, -5922, > -7497, -9276, -1291, -11640, -5631, 518, -7268, -2105, -5901, > -690, -8146, -7059, 133, 1176, -6091, -2895, -6020, -4724, -3616, > -5059, -8253, -2604, -12400, -4776, -3671, -9326, -7000, -5574, > -3248, 4255, -1358, -6255, 8, -7115, -1701, -5227, 9, -517, -8674, > -2554, -4069, -2077, -9872, -6534, 2970, -8307, -3020, -1343, > -8897, -2304, -7424, 2078, -8274, -5559, -8888, -9262, -8473, > -4088, -2429, -8006, -1091, 5015, 2765, 4036, 3101, -3743, 5103, > -10018, -12095, -7646, -5966, -6208, -5784, -1325, -4288, -1665, > -1409, 4685, -7881, -3413, 2738, -2201, 1217, -5113, 206, -1292, > -1725, 10, -2978, -1895, -830, -105, -2395, -3496, -8244, -9956, > -6494, -4678, -4077, 575, 2013, -3411, 3824, -4356, 4523, -5836, > -6350, -5337, -41, -2001, -6632, -970, -6790, -2828, -4061, 476, > 5854, -9648, -4227, 850, 2619, -7747, -2672, 4069, -12618, -6898, > -4178, -1772, -1643, -2064, -157, 4551, -8688, -6087, -2040, > -7239, -783), format = "m/d/y", origin = structure(c(1, 1, 1970 > ), .Names = c("month", "day", "year")), class = c("dates", "times" > )), srvCompDT = structure(c(743, 12429, 3585, 4364, 13227, 13578, > 13591, 8585, 9587, 13913, 14753, 13247, 2246, 1439, 8845, 7018, > 12625, -552, 5688, 7080, 13255, 13549, 12709, 13969, 13997, 9532, > 13689, 1226, 13549, 4093, 13423, 13801, 3181, 14809, 13353, 9457, > 7745, 8986, 4759, 4486, 6449, 11172, 8669, 3344, 13745, 12275, > 5081, 13605, 8006, 3048, 6330, 13521, 5254, 1733, 14095, 8516, > 4848, 13521, 5970, 14697, 8291, 139, 11435, 3567, 8961, 5775, > 3602, 1409, 11577, 12163, 12258, 13156, 9472, 7963, 1362, 10332, > 9557, 3997, 7509, 4691, 3133, 5877, 6782, 11449, 13283, 8040, > 11565, 3425, 7860, 1790, 10778, 13199, 12625, 5889, 3317, 9831, > 1068, 8040, 7123, 9104, 12836, 7928, 12764, 8922, 5324, -1004, > 1806, 10263, 5635, 10310, 5625, 8861, 14613, 3896, 10316, 5725, > 12751, 6113, 2997, 112, 5707, 4987, -1018, 8055, 13885, 13073, > 14585, 14865, 14935, 14390, 9735, 7654, 4557, 661, 1638, 1112, > 14011, 3086, 7032, 13942, 13325, 6735, 13900, 12673, 10148, 14193, > 14767, 8447, 6114, 10688, 13544, 7106, 8587, 14753, 7886, 12280, > 11946, 13662, 3332, 2108, 13977, 6203, 8369, 13857, 8369, 11486, > 8306, 12466, 12639, 7270, 4325, 13843, 14026, 14039, 6147, 7676, > 5781, 7038, 9187, 14640, 6174, 11491, 13913, 13787, 13465, 8854, > 13152, 1826, 1412, 4317, 5794, 5548, 8951, 12947, 12639, 5345, > 5961, 4637, 6465, 13717), format = "m/d/y", origin = structure(c(1, > 1, 1970), .Names = c("month", "day", "year")), class = c("dates", > "times")), retirePlan = c(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, > 1, 3, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, > 2, 2, 2, 2, 3, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, > 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 3, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, > 3, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, > 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, > 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, > 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, > 1, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, > 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), > ? ?ageFedStart = c(20.22, 48.08, 19.24, 27.27, 48.14, 21.47, > ? ?50.61, 28.85, 34.32, 31.34, 21.38, 22.83, 27.86, 23.75, 45.37, > ? ?19.43, 23.18, 20.1, 32.08, 38.91, 52.61, 30.77, 41.62, 47.86, > ? ?39.37, 26.69, 46.7, 19.4, 27.04, 39.31, 61.35, 51.54, 22.02, > ? ?36.37, 39.36, 19.94, 53.48, 25.36, 32.95, 26.2, 22.7, 29.41, > ? ?29.81, 29.54, 36.43, 21.88, 35.3, 28.12, 41.83, 27.03, 20.34, > ? ?41.59, 42.36, 24.27, 22.26, 21.39, 34.25, 25.47, 24.05, 26.15, > ? ?42.78, 22.89, 47.52, 30.29, 49.93, 19.35, 41.73, 19.27, 30.28, > ? ?53.2, 39.32, 52.18, 27.82, 44.1, 23.06, 27.92, 22.95, 27.62, > ? ?28.48, 29.33, 21.51, 25.99, 32.42, 53.94, 43.5, 55.96, 44.74, > ? ?19.43, 47.05, 24.07, 44.77, 45.03, 22.92, 19.84, 26.21, 26.89, > ? ?22.4, 26.67, 33.81, 24.9, 36.56, 45.45, 41.94, 35.57, 20.26, > ? ?24.28, 22.83, 19.97, 38.17, 36.5, 19.08, 48.62, 46.32, 30.99, > ? ?22.55, 38.33, 50.13, 41.07, 33.56, 23.5, 26.82, 20.3, 19.13, > ? ?25.04, 24.28, 28.22, 28.88, 32.21, 51.14, 25.43, 54.08, 54.07, > ? ?33.41, 18.14, 21.48, 18.88, 41.99, 20.19, 23.81, 42.03, 23.66, > ? ?40.02, 47.4, 27.2, 33.81, 35.53, 54.43, 22.56, 20.28, 33.98, > ? ?37.05, 27.61, 28.7, 42.66, 21.88, 40.18, 42.28, 59.98, 36.38, > ? ?23.55, 51.07, 28.15, 21.34, 32.43, 32.25, 20.98, 34.67, 21.75, > ? ?50.58, 37.29, 26.45, 38.01, 43.88, 56.59, 19.49, 39.61, 23.57, > ? ?30.39, 23.85, 24.05, 43.32, 43.03, 35.76, 30.58, 58.08, 31.56, > ? ?24.87, 39.55, 22.75, 23.26, 20.71, 19.69, 30.16, 35.88, 22.14, > ? ?38.42, 32.99, 18.28, 37.52, 39.7)), .Names = c("DOB", "srvCompDT", > "retirePlan", "ageFedStart"), row.names = c(NA, 200L), class > "data.frame") > > rrDT <- function(retSys, ageFedStart, birthDT, serviceCompDT){ > ? ?if(retSys == "CSRS") { > ? ? ? ?if(ageFedStart < 25) rtDT <- dates(birthDT+(365.25*55)) > ? ? ? ?else if (ageFedStart >= 25 & ageFedStart < 30) rtDT <- > dates(serviceCompDT+(365.25*30)) > ? ? ? ?else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- > dates(birthDT+(365.25*60)) > ? ? ? ?else if (ageFedStart >= 40 & ageFedStart < 45) rtDT <- > dates(serviceCompDT+(365.25*20)) > ? ? ? ?else if (ageFedStart >= 45 & ageFedStart < 60) rtDT <- > dates(birthDT+(365.25*65)) > ? ? ? ?else if (ageFedStart >= 60) rtDT <- > dates(serviceCompDT+(365.25*5)) > ? ? ? ?else rtDT <- NA > ? ?} > ? ?else if (retSys == "FERS") { > ? ? ? ?if (birthDT < "01/01/53") { > ? ? ? ? ? ?if(ageFedStart < 25) rtDT <- dates(birthDT+(365.25*55)) > ? ? ? ? ? ?else if (ageFedStart >= 25 & ageFedStart < 30) rtDT <- > dates(serviceCompDT+(365.25*30)) > ? ? ? ? ? ?else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- > dates(birthDT+(365.25*60)) > ? ? ? ? ? ?else if (ageFedStart >= 40 & ageFedStart < 42) rtDT <- > dates(serviceCompDT+(365.25*20)) > ? ? ? ? ? ?else if (ageFedStart >= 42 & ageFedStart < 57) rtDT <- > dates(birthDT+(365.25*62)) > ? ? ? ? ? ?else if (ageFedStart >= 57) rtDT <- > dates(serviceCompDT+(365.25*5)) > ? ? ? ? ? ?else rtDT <- NA > ? ? ? ?} > ? ? ? ?else if (birthDT >= "01/01/53" & birthDT < "01/01/70") { > ? ? ? ? ? ?if(ageFedStart < 26) rtDT <- dates(birthDT+(365.25*56)) > ? ? ? ? ? ?else if (ageFedStart >= 27 & ageFedStart < 30) rtDT <- > dates(serviceCompDT+(365.25*30)) > ? ? ? ? ? ?else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- > dates(birthDT+(365.25*60)) > ? ? ? ? ? ?else if (ageFedStart >= 40 & ageFedStart < 42) rtDT <- > dates(serviceCompDT+(365.25*20)) > ? ? ? ? ? ?else if (ageFedStart >= 42 & ageFedStart < 57) rtDT <- > dates(birthDT+(365.25*62)) > ? ? ? ? ? ?else if (ageFedStart >= 57) rtDT <- > dates(serviceCompDT+(365.25*5)) > ? ? ? ? ? ?else rtDT <- NA > ? ? ? ?} > ? ? ? ?else if (birthDT >= "01/01/70"){ > ? ? ? ? ? ?if(ageFedStart < 27) rtDT <- dates(birthDT+(365.25*56)) > ? ? ? ? ? ?else if (ageFedStart >= 27 & ageFedStart < 30) rtDT <- > dates(serviceCompDT+(365.25*30)) > ? ? ? ? ? ?else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- > dates(birthDT+(365.25*60)) > ? ? ? ? ? ?else if (ageFedStart >= 40 & ageFedStart < 42) rtDT <- > dates(serviceCompDT+(365.25*20)) > ? ? ? ? ? ?else if (ageFedStart >= 42 & ageFedStart < 57) rtDT <- > dates(birthDT+(365.25*62)) > ? ? ? ? ? ?else if (ageFedStart >= 57) rtDT <- > dates(serviceCompDT+(365.25*5)) > ? ? ? ? ? ?else rtDT <- NA > ? ? ? ?} > ? ?} > ? ?else rtDT <- NA > ? ?return(rtDT) > } > > Adrian R. Katschke > Data Analytics Specialist > Human Capital Program Office > Human Resources > PH: 317-212-7813 > DSN: 699-7813 > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. >-- Bert Gunter Genentech Nonclinical Biostatistics
Dennis, This is excellent. Thank you for the help. I knew I had a tangle mess, but I didn't realize how much of a tangle until I used this code. The coding definitely simplified the process and sped up the execution time. Adrian -----Original Message----- From: Dennis Murphy [mailto:djmuser at gmail.com] Sent: Wednesday, January 26, 2011 4:57 PM To: KATSCHKE, ADRIAN CIV DFAS Subject: Re: [R] applying a set of rules to each row Hi: I don't see the need for this labyrinth of if statements. Here's a way that I think solves the CSRS block with only one ifelse statement: agefacC <- with(retireHelp, cut(ageFedStart, breaks = c(0, 25, 30, 40, 45, 60, 100), right = FALSE)) birthlevels <- c('[0,25)', '[30,40)', '[45,60)') baseDate <- ifelse(agefacC %in% birthlevels, birthDT, serviceCompDT) multiplier <- as.numeric( (agefacC == '[0,25)') * 55 + (agefacC == '[25,30)') * 30 + (agefacC == '[30,40)') * 60 + (agefacC == '[40,45)') * 20 + (agefacC == '[45,60)') * 65 + (agefacC == '[60,100)') * 5 ) rtDT <- baseDate + 365.25 * multiplier I have no way of testing this on your data, but the idea is to use a vectorized approach to the problem rather than a series of conditional statements, which, as a CS type informed me recently, is the most time-consuming operation in computing. Double-check that this is an accurate restatement of your code. Explanation of intent: agefacC creates a factor from a continuous variable using the function cut(), with lower limit 0, upper limit 100 and intermediate breaks as given above. The argument right = FALSE closes the interval on the left instead of on the right.> levels(agefacC)[1] "[0,25)" "[25,30)" "[30,40)" "[40,45)" "[45,60)" "[60,100)" The birthlevels vector defines the age groups that use birthDT as the base date; the others use serviceCompDT. The ifelse statement (vectorized) uses birthDT as the base date (the constant term in rtDT) if the level of agefacC for each interval belongs to the levels in the vector birthlevels. If not, the base date is given by serviceCompDT. The multiplier variable is the inner product of a logical statement corresponding to each level of agefacC times the multiplier of 365.25. Once these vectors are in place, rtDT is straightforward to compute in a vectorized fashion. If this approach flies, you can modify the code for the other cases in a similar fashion. Hopefully, it not only simplifies the code, but also speeds up execution time. HTH, Dennis On Wed, Jan 26, 2011 at 12:18 PM, KATSCHKE, ADRIAN CIV DFAS <ADRIAN.KATSCHKE at dfas.mil> wrote: All, I would like to apply a set of rules to each row of the sample data set below. The rule sets are the guidelines for determining an individual's date for retirement eligibility. The rules are found in this document, http://www.opm.gov/feddata/RetirementPaperFinal_v4.pdf. I am only interested in the top two categories for retirement eligibility, the CSRS and FERS plans. The data set has four variables Date of Birth (DOB), service computation date (srvCompDT), retirement plan (retirePlan), and the age at which the employee entered federal service (ageFedStart). The service computation date is used to compute the date eligible for retirement. The retirement plan indicates what system the employee is enrolled under. The data does contain a few other retirement plans, for now I want to just ignore those plans. I have labeled plans as 1-CSRS and 2-FERS, and 3-Other. My first attempt at applying the rules was through a complex nesting of ifelse statements, this was not very successful and quite difficult to follow. I then wrote a function and tried using "apply" unsuccessfully. The function is shown below. I would like to put a short script or function together that would allow for an efficient application of the rules to each of the employees. I am trying to avoid a loop, because my data set is quite large, and I may need to update my data set regularly and re-run the analysis and reports that will come from this work. Any advice or guidance on building the function or code to apply the rules would be quite helpful. retireHelp <- structure(list(DOB = structure(c(-6642, -5134, -3444, -5598, -4356, 5737, -4894, -1951, -2950, 2467, 6945, 4908, -7930, -7236, -7727, -77, 4158, -7892, -6028, -7132, -5959, 2309, -2494, -3513, -383, -216, -3369, -5861, 3674, -10265, -8986, -5023, -4862, 1526, -1022, 2175, -11790, -278, -7275, -5084, -1842, 430, -2220, -7444, 440, 4285, -7812, 3335, -7271, -6825, -1098, -1670, -10219, -7131, 5963, 704, -7662, 4219, -2813, 5147, -7334, -8223, -5922, -7497, -9276, -1291, -11640, -5631, 518, -7268, -2105, -5901, -690, -8146, -7059, 133, 1176, -6091, -2895, -6020, -4724, -3616, -5059, -8253, -2604, -12400, -4776, -3671, -9326, -7000, -5574, -3248, 4255, -1358, -6255, 8, -7115, -1701, -5227, 9, -517, -8674, -2554, -4069, -2077, -9872, -6534, 2970, -8307, -3020, -1343, -8897, -2304, -7424, 2078, -8274, -5559, -8888, -9262, -8473, -4088, -2429, -8006, -1091, 5015, 2765, 4036, 3101, -3743, 5103, -10018, -12095, -7646, -5966, -6208, -5784, -1325, -4288, -1665, -1409, 4685, -7881, -3413, 2738, -2201, 1217, -5113, 206, -1292, -1725, 10, -2978, -1895, -830, -105, -2395, -3496, -8244, -9956, -6494, -4678, -4077, 575, 2013, -3411, 3824, -4356, 4523, -5836, -6350, -5337, -41, -2001, -6632, -970, -6790, -2828, -4061, 476, 5854, -9648, -4227, 850, 2619, -7747, -2672, 4069, -12618, -6898, -4178, -1772, -1643, -2064, -157, 4551, -8688, -6087, -2040, -7239, -783), format = "m/d/y", origin = structure(c(1, 1, 1970 ), .Names = c("month", "day", "year")), class = c("dates", "times" )), srvCompDT = structure(c(743, 12429, 3585, 4364, 13227, 13578, 13591, 8585, 9587, 13913, 14753, 13247, 2246, 1439, 8845, 7018, 12625, -552, 5688, 7080, 13255, 13549, 12709, 13969, 13997, 9532, 13689, 1226, 13549, 4093, 13423, 13801, 3181, 14809, 13353, 9457, 7745, 8986, 4759, 4486, 6449, 11172, 8669, 3344, 13745, 12275, 5081, 13605, 8006, 3048, 6330, 13521, 5254, 1733, 14095, 8516, 4848, 13521, 5970, 14697, 8291, 139, 11435, 3567, 8961, 5775, 3602, 1409, 11577, 12163, 12258, 13156, 9472, 7963, 1362, 10332, 9557, 3997, 7509, 4691, 3133, 5877, 6782, 11449, 13283, 8040, 11565, 3425, 7860, 1790, 10778, 13199, 12625, 5889, 3317, 9831, 1068, 8040, 7123, 9104, 12836, 7928, 12764, 8922, 5324, -1004, 1806, 10263, 5635, 10310, 5625, 8861, 14613, 3896, 10316, 5725, 12751, 6113, 2997, 112, 5707, 4987, -1018, 8055, 13885, 13073, 14585, 14865, 14935, 14390, 9735, 7654, 4557, 661, 1638, 1112, 14011, 3086, 7032, 13942, 13325, 6735, 13900, 12673, 10148, 14193, 14767, 8447, 6114, 10688, 13544, 7106, 8587, 14753, 7886, 12280, 11946, 13662, 3332, 2108, 13977, 6203, 8369, 13857, 8369, 11486, 8306, 12466, 12639, 7270, 4325, 13843, 14026, 14039, 6147, 7676, 5781, 7038, 9187, 14640, 6174, 11491, 13913, 13787, 13465, 8854, 13152, 1826, 1412, 4317, 5794, 5548, 8951, 12947, 12639, 5345, 5961, 4637, 6465, 13717), format = "m/d/y", origin = structure(c(1, 1, 1970), .Names = c("month", "day", "year")), class = c("dates", "times")), retirePlan = c(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 3, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 3, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 3, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 3, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), ageFedStart = c(20.22, 48.08, 19.24, 27.27, 48.14, 21.47, 50.61, 28.85, 34.32, 31.34, 21.38, 22.83, 27.86, 23.75, 45.37, 19.43, 23.18, 20.1, 32.08, 38.91, 52.61, 30.77, 41.62, 47.86, 39.37, 26.69, 46.7, 19.4, 27.04, 39.31, 61.35, 51.54, 22.02, 36.37, 39.36, 19.94, 53.48, 25.36, 32.95, 26.2, 22.7, 29.41, 29.81, 29.54, 36.43, 21.88, 35.3, 28.12, 41.83, 27.03, 20.34, 41.59, 42.36, 24.27, 22.26, 21.39, 34.25, 25.47, 24.05, 26.15, 42.78, 22.89, 47.52, 30.29, 49.93, 19.35, 41.73, 19.27, 30.28, 53.2, 39.32, 52.18, 27.82, 44.1, 23.06, 27.92, 22.95, 27.62, 28.48, 29.33, 21.51, 25.99, 32.42, 53.94, 43.5, 55.96, 44.74, 19.43, 47.05, 24.07, 44.77, 45.03, 22.92, 19.84, 26.21, 26.89, 22.4, 26.67, 33.81, 24.9, 36.56, 45.45, 41.94, 35.57, 20.26, 24.28, 22.83, 19.97, 38.17, 36.5, 19.08, 48.62, 46.32, 30.99, 22.55, 38.33, 50.13, 41.07, 33.56, 23.5, 26.82, 20.3, 19.13, 25.04, 24.28, 28.22, 28.88, 32.21, 51.14, 25.43, 54.08, 54.07, 33.41, 18.14, 21.48, 18.88, 41.99, 20.19, 23.81, 42.03, 23.66, 40.02, 47.4, 27.2, 33.81, 35.53, 54.43, 22.56, 20.28, 33.98, 37.05, 27.61, 28.7, 42.66, 21.88, 40.18, 42.28, 59.98, 36.38, 23.55, 51.07, 28.15, 21.34, 32.43, 32.25, 20.98, 34.67, 21.75, 50.58, 37.29, 26.45, 38.01, 43.88, 56.59, 19.49, 39.61, 23.57, 30.39, 23.85, 24.05, 43.32, 43.03, 35.76, 30.58, 58.08, 31.56, 24.87, 39.55, 22.75, 23.26, 20.71, 19.69, 30.16, 35.88, 22.14, 38.42, 32.99, 18.28, 37.52, 39.7)), .Names = c("DOB", "srvCompDT", "retirePlan", "ageFedStart"), row.names = c(NA, 200L), class "data.frame") rrDT <- function(retSys, ageFedStart, birthDT, serviceCompDT){ if(retSys == "CSRS") { if(ageFedStart < 25) rtDT <- dates(birthDT+(365.25*55)) else if (ageFedStart >= 25 & ageFedStart < 30) rtDT <- dates(serviceCompDT+(365.25*30)) else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- dates(birthDT+(365.25*60)) else if (ageFedStart >= 40 & ageFedStart < 45) rtDT <- dates(serviceCompDT+(365.25*20)) else if (ageFedStart >= 45 & ageFedStart < 60) rtDT <- dates(birthDT+(365.25*65)) else if (ageFedStart >= 60) rtDT <- dates(serviceCompDT+(365.25*5)) else rtDT <- NA } else if (retSys == "FERS") { if (birthDT < "01/01/53") { if(ageFedStart < 25) rtDT <- dates(birthDT+(365.25*55)) else if (ageFedStart >= 25 & ageFedStart < 30) rtDT <- dates(serviceCompDT+(365.25*30)) else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- dates(birthDT+(365.25*60)) else if (ageFedStart >= 40 & ageFedStart < 42) rtDT <- dates(serviceCompDT+(365.25*20)) else if (ageFedStart >= 42 & ageFedStart < 57) rtDT <- dates(birthDT+(365.25*62)) else if (ageFedStart >= 57) rtDT <- dates(serviceCompDT+(365.25*5)) else rtDT <- NA } else if (birthDT >= "01/01/53" & birthDT < "01/01/70") { if(ageFedStart < 26) rtDT <- dates(birthDT+(365.25*56)) else if (ageFedStart >= 27 & ageFedStart < 30) rtDT <- dates(serviceCompDT+(365.25*30)) else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- dates(birthDT+(365.25*60)) else if (ageFedStart >= 40 & ageFedStart < 42) rtDT <- dates(serviceCompDT+(365.25*20)) else if (ageFedStart >= 42 & ageFedStart < 57) rtDT <- dates(birthDT+(365.25*62)) else if (ageFedStart >= 57) rtDT <- dates(serviceCompDT+(365.25*5)) else rtDT <- NA } else if (birthDT >= "01/01/70"){ if(ageFedStart < 27) rtDT <- dates(birthDT+(365.25*56)) else if (ageFedStart >= 27 & ageFedStart < 30) rtDT <- dates(serviceCompDT+(365.25*30)) else if (ageFedStart >= 30 & ageFedStart < 40) rtDT <- dates(birthDT+(365.25*60)) else if (ageFedStart >= 40 & ageFedStart < 42) rtDT <- dates(serviceCompDT+(365.25*20)) else if (ageFedStart >= 42 & ageFedStart < 57) rtDT <- dates(birthDT+(365.25*62)) else if (ageFedStart >= 57) rtDT <- dates(serviceCompDT+(365.25*5)) else rtDT <- NA } } else rtDT <- NA return(rtDT) } Adrian R. Katschke Data Analytics Specialist Human Capital Program Office Human Resources PH: 317-212-7813 DSN: 699-7813 ______________________________________________ R-help at r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.