two possibilities are:
f <- factor(DF$cvd_basestudy, unique(DF$cvd_basestudy))
ind <- ave(DF$es_time, f, FUN = function (x) x == max(x))
DF[as.logical(ind), ]
f <- factor(DF$cvd_basestudy, unique(DF$cvd_basestudy))
lis <- lapply(split(DF, f), function (d) d[which.max(d$es_time), ])
do.call(rbind, lis)
with DF <- datares_sinus_variable.
I hope it helps.
Best,
Dimitris
On 4/28/2011 1:31 PM, Sandy Small wrote:> Hi
>
> I'm trying to extract complete rows from a dataframe by group based on
> the maximum in a column within that group.
> Thus I have a dataframe:
>
> cvd_basestudy ... es_time ...
> _____________
> study1 ... 0.3091667
> study2 ... 0.3091667
> study2 ... 0.2625000
> study3 ... 0.3033333
> study3 ... 0.2625000
> __________
> etc
>
> I can extract the basestudy and the max(es_time) using ddply
> ddply(datares_sinus_variable, .(cvd_basestudy),
> function(x){max(x[['es_time']])})
> or by
> by(datares_sinus_variable$es_time, datares_sinus_variable$cvd_basestudy,
> max)
>
> but how do I extract the whole line so that I can get a dataframe with
> all the data for the maximum line?
>
> (dput output from first 5 rows of my actual dataframe follows)
>
> Any help would be much appreciated. Thanks in advance
> Sandy Small
>
> structure(list(cvd_basestudy = c("study1", "study2",
"study2",
> "study3", "study3"), ecd_rhythm = structure(c(5L, 5L,
5L, 5L,
> 5L), .Label = c("AF", "FLUTTER", "PACED AF",
"SCRAP", "SINUS",
> "UNSURE"), class = "factor"), cvd_frame_mode =
structure(c(2L,
> 2L, 2L, 2L, 2L), .Label = c("fixed_time",
"variable_time"), class > "factor"),
> cvd_part_fmt = structure(c(4L, 4L, 4L, 4L, 4L), .Label =
c("first",
> "last", "mid", "whole"), class =
"factor"), cvd_prev_fmt > structure(c(1L,
> 2L, 1L, 3L, 2L), .Label = c("All", "Best",
"Q1", "Q2", "Q3",
> "Q4"), class = "factor"), cvd_cur_fmt = structure(c(5L,
5L,
> 1L, 4L, 4L), .Label = c("All", "Best", "Q1",
"Q2", "Q3",
> "Q4"), class = "factor"), ps_pt = c(1, 1, 2, 1, 2),
es_pt = c(8,
> 8, 8, 8, 8), ed_pt = c(21, 21, 18, 17, 18), cvd_median_limit = c(1.057,
> 1.057, 1.048, 1.037, 1.05), cvd_average_beat = c(1.06, 1.06,
> 1.05, 1.04, 1.05), limit = c(0.9, 0.9, 0.9, 0.9, 0.9), sstd_mi >
c(FALSE,
> FALSE, FALSE, FALSE, FALSE), sstd_hbp = c(FALSE, FALSE, FALSE,
> FALSE, FALSE), sstd_ptca = c(FALSE, FALSE, FALSE, FALSE,
> FALSE), sstd_cabg = c(TRUE, TRUE, TRUE, TRUE, TRUE), sstd_norm_perf
> = c(FALSE,
> FALSE, FALSE, FALSE, FALSE), sstd_posnegett = structure(c(NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label =
c("-",
> "+"), class = "factor"), sstd_function =
structure(c(NA_integer_,
> NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label >
c("MODERATE",
> "NORMAL", "POOR", "VERY POOR"), class =
"factor"), cvd_cur_fmt_n = c(3,
> 3, NA, 2, 2), cvd_prev_fmt_n = c(NA, NA, NA, 1, NA), cvd_cur_fmt2 >
structure(c(3L,
> 3L, 1L, 3L, 3L), .Label = c("All", "Best",
"Quartiles"), class > "factor"),
> cvd_prev_fmt2 = structure(c(1L, 2L, 1L, 3L, 2L), .Label =
c("All",
> "Best", "Quartiles"), class = "factor"),
es_time = c(0.309166666666667,
> 0.309166666666667, 0.2625, 0.303333333333333, 0.2625), es_time_err >
c(0.0441666666666667,
> 0.0441666666666667, 0.04375, 0.0433333333333333, 0.04375),
> ed_time = c(0.574166666666667, 0.574166666666667, 0.4375,
> 0.39, 0.4375)), .Names = c("cvd_basestudy",
"ecd_rhythm",
> "cvd_frame_mode", "cvd_part_fmt",
"cvd_prev_fmt", "cvd_cur_fmt",
> "ps_pt", "es_pt", "ed_pt",
"cvd_median_limit", "cvd_average_beat",
> "limit", "sstd_mi", "sstd_hbp",
"sstd_ptca", "sstd_cabg", "sstd_norm_perf",
> "sstd_posnegett", "sstd_function",
"cvd_cur_fmt_n", "cvd_prev_fmt_n",
> "cvd_cur_fmt2", "cvd_prev_fmt2", "es_time",
"es_time_err", "ed_time"
> ), row.names = c("651", "655", "656",
"661", "663"), class = "data.frame")
>
>
>
********************************************************************************************************************
>
>
> This message may contain confidential information. If yo...{{dropped:21}}
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
> http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>
--
Dimitris Rizopoulos
Assistant Professor
Department of Biostatistics
Erasmus University Medical Center
Address: PO Box 2040, 3000 CA Rotterdam, the Netherlands
Tel: +31/(0)10/7043478
Fax: +31/(0)10/7043014
Web: http://www.erasmusmc.nl/biostatistiek/