thr3ads.net - R help - [R] R relative frequency by date and operator [May 2013]

If this information is useful, please help other people find it:
Share via:

moadeep

2013-May-20 12:30 UTC

[R] R relative frequency by date and operator

I have the data frame with the following structure

   Operator Score     Date Freq
1          A  Crap Apr 2013    0
2          D  Crap Apr 2013    0
3          J  Crap Apr 2013    0
4          L  Crap Apr 2013    0
5          M  Crap Apr 2013    0
6          A  Good Apr 2013    1
7          D  Good Apr 2013   14
8          J  Good Apr 2013   26
9          L  Good Apr 2013    3
10         M  Good Apr 2013    9

I would like to aggregate this data such that I can find the relative
frequency of each score (Good, Ok, Poor and Crap) for each combination of
month and operator. For example For operator A in the month Jan 2013 - I
would like the following output

   Operator Score     Date Freq Rel.Freq
1          A  Crap Jan 2013    0       0
2          A  Poor Jan 2013    5      0.22
3          A  Good Jan 2013    15  0.65
4          A  Ok Jan 2013   3           0.13

i.e I would like to add a relative frequency column to my existing
data.frame. I haven't got anywhere near an automated solution. The closest I
have is 

tmp <- subset(df, Operator == "A")
tmp$N.norm <- tmp$Freq/sum(ans2$Freq)

However this sums all data for operator A regardless of date. So I would
need to subset again according to date. Is there a straightforward way to do
this in R



structure(list(Operator = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label =
c("A",
"D", "J", "L", "M"), class =
"factor"), Score = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L, 4L), .Label = c("Crap", "Good", "OK",
"Poor"), class = "factor"),
    Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
    ), .Label = c("Apr 2013", "Feb 2013", "Jan
2013", "Mar 2013",
    "May 2013"), class = "factor"), Freq = c(0L, 0L, 0L, 0L,
    0L, 1L, 14L, 26L, 3L, 9L, 3L, 5L, 3L, 0L, 6L, 3L, 24L, 20L, 
    29L, 14L, 0L, 0L, 0L, 0L, 0L, 2L, 17L, 24L, 12L, 9L, 2L, 
    10L, 4L, 0L, 4L, 0L, 27L, 36L, 37L, 13L, 0L, 0L, 0L, 0L, 
    0L, 15L, 16L, 20L, 5L, 15L, 3L, 6L, 17L, 3L, 5L, 5L, 31L, 
    12L, 41L, 9L, 0L, 1L, 0L, 0L, 0L, 1L, 8L, 11L, 12L, 17L, 
    1L, 1L, 3L, 4L, 4L, 5L, 16L, 21L, 25L, 15L, 0L, 0L, 0L, 0L, 
    0L, 5L, 7L, 18L, 4L, 3L, 0L, 5L, 2L, 0L, 1L, 1L, 15L, 9L, 
    10L, 9L)), .Names = c("Operator", "Score",
"Date", "Freq"
), row.names = c(NA, -100L), class = "data.frame")



--
View this message in context:
http://r.789695.n4.nabble.com/R-relative-frequency-by-date-and-operator-tp4667498.html
Sent from the R help mailing list archive at Nabble.com.

arun

2013-May-20 13:19 UTC

head link

[R] R relative frequency by date and operator

Hi,
Try either:
dat2<- structure(list(Operator = structure(c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("A",
"D", "J", "L", "M"), class =
"factor"), Score = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L), .Label = c("Crap",
"Good", "OK", "Poor"),
 class = "factor"), Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L ), .Label = c("Apr
2013", "Feb 2013", "Jan 2013", "Mar 2013",
"May 2013"), class = "factor"), Freq = c(0L, 0L, 0L, 0L, 0L,
1L, 14L, 26L, 3L, 9L, 3L, 5L, 3L, 0L, 6L, 3L, 24L, 20L, 29L, 14L, 0L, 0L, 0L,
0L, 0L, 2L, 17L, 24L, 12L, 9L, 2L, 10L, 4L, 0L, 4L, 0L, 27L, 36L, 37L, 13L, 0L,
0L, 0L, 0L, 0L, 15L, 16L, 20L, 5L, 15L, 3L, 6L, 17L, 3L, 5L, 5L, 31L, 12L, 41L,
9L, 0L, 1L, 0L, 0L, 0L, 1L, 8L, 11L, 12L, 17L, 1L, 1L, 3L, 4L, 4L, 5L, 16L, 21L,
25L, 15L, 0L, 0L, 0L, 0L, 0L, 5L, 7L, 18L, 4L, 3L, 0L, 5L, 2L, 0L, 1L, 1L, 15L,
9L, 10L,
 9L)), .Names = c("Operator", "Score", "Date",
"Freq" ), row.names = c(NA, -100L), class = "data.frame")

dat2[,1:3]<- lapply(dat2[,1:3],as.character)
res1<-unsplit(lapply(split(dat2,dat2$Operator),function(x)
{x$Rel.Freq<-round(x$Freq/with(x,ave(Freq,Date,FUN=sum)),2);x}),dat2$Operator)
?subset(res1,Operator=="A" & Date=="Jan 2013")
#?? Operator Score???? Date Freq Rel.Freq
#41??????? A? Crap Jan 2013??? 0???? 0.00
#46??????? A? Good Jan 2013?? 15???? 0.65
#51??????? A??? OK Jan 2013??? 3???? 0.13
#56??????? A? Poor Jan 2013??? 5???? 0.22
#or
dat3<- dat2
dat3$Rel.Freq<-round(dat3$Freq/with(dat3,ave(Freq,Operator,Date,FUN=sum)),2)
?subset(dat3,Operator=="A" & Date=="Jan 2013")
#?? Operator Score???? Date Freq Rel.Freq
#41??????? A? Crap Jan 2013??? 0???? 0.00
#46??????? A? Good Jan 2013?? 15???? 0.65
#51??????? A??? OK Jan 2013??? 3???? 0.13
#56??????? A? Poor Jan 2013??? 5???? 0.22
?row.names(dat3)<- row.names(res1)
?identical(dat3,res1)
#[1] TRUE
A.K.

>I have the data frame with the following structure 
>
?>? Operator Score ? ? Date Freq >1 ? ? ? ? ?A ?Crap Apr 2013 ? ?0 
>2 ? ? ? ? ?D ?Crap Apr 2013 ? ?0 
>3 ? ? ? ? ?J ?Crap Apr 2013 ? ?0 
>4 ? ? ? ? ?L ?Crap Apr 2013 ? ?0 
>5 ? ? ? ? ?M ?Crap Apr 2013 ? ?0 
>6 ? ? ? ? ?A ?Good Apr 2013 ? ?1 
>7 ? ? ? ? ?D ?Good Apr 2013 ? 14 
>8 ? ? ? ? ?J ?Good Apr 2013 ? 26 
>9 ? ? ? ? ?L ?Good Apr 2013 ? ?3 
>10 ? ? ? ? M ?Good Apr 2013 ? ?9 
>
>I would like to aggregate this data such that I can find the relative frequency of each score (Good, Ok, Poor and Crap) for each 
combination of >month and operator. For example For operator A in the 
month Jan 2013 - I would like the following output >
>?? Operator Score ? ? Date Freq Rel.Freq 
>1 ? ? ? ? ?A ?Crap Jan 2013 ? ?0 ? ? ? 0 
>2 ? ? ? ? ?A ?Poor Jan 2013 ? ?5 ? ? ?0.22 
>3 ? ? ? ? ?A ?Good Jan 2013 ? ?15 ?0.65 
>4 ? ? ? ? ?A ?Ok Jan 2013 ? 3 ? ? ? ? ? 0.13 
>
>i.e I would like to add a relative frequency column to my existing data.frame. I haven't got anywhere near an automated solution. 
The closest I >have is >
>tmp <- subset(df, Operator == "A") 
>tmp$N.norm <- tmp$Freq/sum(ans2$Freq) 
>
>However this sums all data for operator A regardless of date. So I would need to subset again according to date. Is there a 
straightforward way to >do this in R 



<nabble_embed>structure(list(Operator = structure(c(1L, 2L, 3L, 4L, 5L,
1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label =
c("A",
"D", "J", "L", "M"), class =
"factor"), Score = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L, 4L), .Label = c("Crap", "Good", "OK",
"Poor"), class = "factor"),
    Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
    ), .Label = c("Apr 2013", "Feb 2013", "Jan
2013", "Mar 2013",
    "May 2013"), class = "factor"), Freq = c(0L, 0L, 0L, 0L,
    0L, 1L, 14L, 26L, 3L, 9L, 3L, 5L, 3L, 0L, 6L, 3L, 24L, 20L, 
    29L, 14L, 0L, 0L, 0L, 0L, 0L, 2L, 17L, 24L, 12L, 9L, 2L, 
    10L, 4L, 0L, 4L, 0L, 27L, 36L, 37L, 13L, 0L, 0L, 0L, 0L, 
    0L, 15L, 16L, 20L, 5L, 15L, 3L, 6L, 17L, 3L, 5L, 5L, 31L, 
    12L, 41L, 9L, 0L, 1L, 0L, 0L, 0L, 1L, 8L, 11L, 12L, 17L, 
    1L, 1L, 3L, 4L, 4L, 5L, 16L, 21L, 25L, 15L, 0L, 0L, 0L, 0L, 
    0L, 5L, 7L, 18L, 4L, 3L, 0L, 5L, 2L, 0L, 1L, 1L, 15L, 9L, 
    10L, 9L)), .Names = c("Operator", "Score",
"Date", "Freq"
), row.names = c(NA, -100L), class =
"data.frame")</nabble_embed>

R help - May 2013 - R relative frequency by date and operator

[R] R relative frequency by date and operator

[R] R relative frequency by date and operator