Hi,
Try either:
dat2<- structure(list(Operator = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("A",
"D", "J", "L", "M"), class =
"factor"), Score = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L), .Label = c("Crap",
"Good", "OK", "Poor"),
class = "factor"), Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L ), .Label = c("Apr
2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013"), class = "factor"), Freq = c(0L, 0L, 0L, 0L, 0L,
1L, 14L, 26L, 3L, 9L, 3L, 5L, 3L, 0L, 6L, 3L, 24L, 20L, 29L, 14L, 0L, 0L, 0L,
0L, 0L, 2L, 17L, 24L, 12L, 9L, 2L, 10L, 4L, 0L, 4L, 0L, 27L, 36L, 37L, 13L, 0L,
0L, 0L, 0L, 0L, 15L, 16L, 20L, 5L, 15L, 3L, 6L, 17L, 3L, 5L, 5L, 31L, 12L, 41L,
9L, 0L, 1L, 0L, 0L, 0L, 1L, 8L, 11L, 12L, 17L, 1L, 1L, 3L, 4L, 4L, 5L, 16L, 21L,
25L, 15L, 0L, 0L, 0L, 0L, 0L, 5L, 7L, 18L, 4L, 3L, 0L, 5L, 2L, 0L, 1L, 1L, 15L,
9L, 10L,
9L)), .Names = c("Operator", "Score", "Date",
"Freq" ), row.names = c(NA, -100L), class = "data.frame")
dat2[,1:3]<- lapply(dat2[,1:3],as.character)
res1<-unsplit(lapply(split(dat2,dat2$Operator),function(x)
{x$Rel.Freq<-round(x$Freq/with(x,ave(Freq,Date,FUN=sum)),2);x}),dat2$Operator)
?subset(res1,Operator=="A" & Date=="Jan 2013")
#?? Operator Score???? Date Freq Rel.Freq
#41??????? A? Crap Jan 2013??? 0???? 0.00
#46??????? A? Good Jan 2013?? 15???? 0.65
#51??????? A??? OK Jan 2013??? 3???? 0.13
#56??????? A? Poor Jan 2013??? 5???? 0.22
#or
dat3<- dat2
dat3$Rel.Freq<-round(dat3$Freq/with(dat3,ave(Freq,Operator,Date,FUN=sum)),2)
?subset(dat3,Operator=="A" & Date=="Jan 2013")
#?? Operator Score???? Date Freq Rel.Freq
#41??????? A? Crap Jan 2013??? 0???? 0.00
#46??????? A? Good Jan 2013?? 15???? 0.65
#51??????? A??? OK Jan 2013??? 3???? 0.13
#56??????? A? Poor Jan 2013??? 5???? 0.22
?row.names(dat3)<- row.names(res1)
?identical(dat3,res1)
#[1] TRUE
A.K.
>I have the data frame with the following structure
>
?>? Operator Score ? ? Date Freq >1 ? ? ? ? ?A ?Crap Apr 2013 ? ?0
>2 ? ? ? ? ?D ?Crap Apr 2013 ? ?0
>3 ? ? ? ? ?J ?Crap Apr 2013 ? ?0
>4 ? ? ? ? ?L ?Crap Apr 2013 ? ?0
>5 ? ? ? ? ?M ?Crap Apr 2013 ? ?0
>6 ? ? ? ? ?A ?Good Apr 2013 ? ?1
>7 ? ? ? ? ?D ?Good Apr 2013 ? 14
>8 ? ? ? ? ?J ?Good Apr 2013 ? 26
>9 ? ? ? ? ?L ?Good Apr 2013 ? ?3
>10 ? ? ? ? M ?Good Apr 2013 ? ?9
>
>I would like to aggregate this data such that I can find the
relative frequency of each score (Good, Ok, Poor and Crap) for each
combination of >month and operator. For example For operator A in the
month Jan 2013 - I would like the following output >
>?? Operator Score ? ? Date Freq Rel.Freq
>1 ? ? ? ? ?A ?Crap Jan 2013 ? ?0 ? ? ? 0
>2 ? ? ? ? ?A ?Poor Jan 2013 ? ?5 ? ? ?0.22
>3 ? ? ? ? ?A ?Good Jan 2013 ? ?15 ?0.65
>4 ? ? ? ? ?A ?Ok Jan 2013 ? 3 ? ? ? ? ? 0.13
>
>i.e I would like to add a relative frequency column to my
existing data.frame. I haven't got anywhere near an automated solution.
The closest I >have is >
>tmp <- subset(df, Operator == "A")
>tmp$N.norm <- tmp$Freq/sum(ans2$Freq)
>
>However this sums all data for operator A regardless of date. So
I would need to subset again according to date. Is there a
straightforward way to >do this in R
<nabble_embed>structure(list(Operator = structure(c(1L, 2L, 3L, 4L, 5L,
1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label =
c("A",
"D", "J", "L", "M"), class =
"factor"), Score = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L), .Label = c("Crap", "Good", "OK",
"Poor"), class = "factor"),
Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
), .Label = c("Apr 2013", "Feb 2013", "Jan
2013", "Mar 2013",
"May 2013"), class = "factor"), Freq = c(0L, 0L, 0L, 0L,
0L, 1L, 14L, 26L, 3L, 9L, 3L, 5L, 3L, 0L, 6L, 3L, 24L, 20L,
29L, 14L, 0L, 0L, 0L, 0L, 0L, 2L, 17L, 24L, 12L, 9L, 2L,
10L, 4L, 0L, 4L, 0L, 27L, 36L, 37L, 13L, 0L, 0L, 0L, 0L,
0L, 15L, 16L, 20L, 5L, 15L, 3L, 6L, 17L, 3L, 5L, 5L, 31L,
12L, 41L, 9L, 0L, 1L, 0L, 0L, 0L, 1L, 8L, 11L, 12L, 17L,
1L, 1L, 3L, 4L, 4L, 5L, 16L, 21L, 25L, 15L, 0L, 0L, 0L, 0L,
0L, 5L, 7L, 18L, 4L, 3L, 0L, 5L, 2L, 0L, 1L, 1L, 15L, 9L,
10L, 9L)), .Names = c("Operator", "Score",
"Date", "Freq"
), row.names = c(NA, -100L), class =
"data.frame")</nabble_embed>