Hello all,
I have a very large data frame (more than 5 million lines) as below (dput
example at the end of mail):
Station Antenna Tag DateTime Power Events
1 2 999 22/07/2013 11:00:21 17 1
1 2 999 22/07/2013 11:33:47 31 1
1 2 999 22/07/2013 11:34:00 19 1
1 2 999 22/07/2013 11:34:16 53 1
1 2 999 22/07/2013 11:43:20 15 1
1 2 999 22/07/2013 11:43:35 17 1
To each Tag, in each Antenna, in each Station, I need to create a 10 min
interval and sum the number of Events and mean of Power in the time
interval, as below (complete wanted output at the end of mail).
Station Antenna Tag StartDateTime EndDateTime Power Events
1 2 999 22/07/2013 11:00:21 22/07/2013 11:00:21 17 1
1 2 999 22/07/2013 11:34:16 22/07/2013 11:43:35 27 5
1 2 999 22/07/2013 11:44:35 22/07/2013 11:45:40 17 14
2 1 1 25/07/2013 14:19:45 25/07/2013 14:20:39 65 4
2 1 2 25/07/2013 14:20:13 25/07/2013 14:25:14 21 3
2 1 4 25/07/2013 14:20:46 25/07/2013 14:20:46 28 1
Show start and end points of each interval is optional, not necessary. I
put both to show the irregular time interval: look to Tag 999: first
interval are between 11:00 and 11:10, second between 11:34 and 11:44 and
third are between 11:44 and 11:45.
First I tried a for-loop, without success. After that, I tried this code:
require (plyr)
ddply (data, .(Station, Antenna, Tag, cut(data$DateTime, "10 min")),
summarise, Power = round (mean(Power), 0), Events = sum (Events))
Is almost what I want, because cut() divided in regular time intervals, but
in some cases I do not have this, and it split a unique observation in two.
Any ideas to solve this issue?
R version 3.0.1 (2013-05-16) -- "Good Sport"
Platform: x86_64-w64-mingw32/x64 (64-bit)
Windows 7 Professional
Thanks in advanced,
Raoni
--
Raoni Rosa Rodrigues
Research Associate of Fish Transposition Center CTPeixes
Universidade Federal de Minas Gerais - UFMG
Brasil
rodrigues.raoni@gmail.com
##############################complete data dput
structure(list(Station = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Antenna = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), Tag = c(999L, 999L, 999L, 999L, 999L, 999L,
999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L,
999L, 999L, 999L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 4L, 18L, 18L, 18L,
21L, 22L, 36L, 36L, 36L, 36L, 36L, 48L, 48L, 48L, 48L, 48L, 48L,
48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L,
48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L,
48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L,
48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L,
48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L,
48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L), DateTime = structure(c(3L,
4L, 5L, 5L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 18L, 19L, 19L, 19L, 19L, 20L, 23L, 19L, 17L, 17L,
17L, 23L, 18L, 1L, 1L, 1L, 2L, 2L, 9L, 9L, 10L, 10L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L,
13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L,
15L, 15L, 16L, 16L, 16L, 16L, 18L, 19L, 21L, 21L, 21L, 21L, 21L,
22L, 22L, 22L, 22L, 22L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 25L,
25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 27L, 27L,
27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L), .Label = c("19/06/2013
22:15",
"19/06/2013 22:16", "22/07/2013 11:00", "22/07/2013
11:33", "22/07/2013
11:34",
"22/07/2013 11:43", "22/07/2013 11:44", "22/07/2013
11:45", "25/07/2013
14:10",
"25/07/2013 14:11", "25/07/2013 14:12", "25/07/2013
14:13", "25/07/2013
14:14",
"25/07/2013 14:15", "25/07/2013 14:16", "25/07/2013
14:17", "25/07/2013
14:18",
"25/07/2013 14:19", "25/07/2013 14:20", "25/07/2013
14:21", "25/07/2013
14:23",
"25/07/2013 14:24", "25/07/2013 14:25", "25/07/2013
14:26", "25/07/2013
14:27",
"25/07/2013 14:28", "25/07/2013 14:29", "25/07/2013
14:30"), class "factor"),
Power = c(17L, 31L, 19L, 53L, 15L, 17L, 21L, 12L, 15L, 22L,
19L, 15L, 13L, 14L, 15L, 12L, 23L, 19L, 16L, 20L, 30L, 37L,
25L, 167L, 24L, 14L, 24L, 28L, 31L, 48L, 158L, 18L, 25L,
102L, 101L, 110L, 90L, 58L, 202L, 192L, 179L, 159L, 155L,
184L, 189L, 64L, 203L, 231L, 207L, 171L, 196L, 169L, 169L,
216L, 202L, 242L, 175L, 215L, 156L, 114L, 232L, 210L, 208L,
119L, 206L, 188L, 215L, 210L, 171L, 187L, 189L, 212L, 211L,
206L, 174L, 194L, 160L, 26L, 204L, 198L, 207L, 90L, 162L,
131L, 168L, 179L, 227L, 198L, 131L, 149L, 205L, 209L, 86L,
84L, 175L, 176L, 203L, 192L, 139L, 193L, 197L, 220L, 208L,
155L, 125L, 217L, 114L, 167L, 108L, 208L, 178L, 207L, 210L,
215L, 170L, 200L, 219L, 246L), Events = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("Station",
"Antenna", "Tag", "DateTime", "Power",
"Events"), class = "data.frame",
row.names = c(NA,
-118L))
########################complete desired result dput
structure(list(Station = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), Antenna = c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), Tag = c(999L, 999L, 999L, 1L, 2L, 4L, 18L, 21L,
22L, 36L, 48L, 48L), StartDateTime = structure(c(2L, 3L, 4L,
8L, 9L, 10L, 6L, 12L, 7L, 1L, 5L, 11L), .Label = c("19/06/2013
22:15:49",
"22/07/2013 11:00:21", "22/07/2013 11:34:16",
"22/07/2013 11:44:35",
"25/07/2013 14:10:44", "25/07/2013 14:18:15",
"25/07/2013 14:19:44",
"25/07/2013 14:19:45", "25/07/2013 14:20:13",
"25/07/2013 14:20:46",
"25/07/2013 14:23:24", "25/07/2013 14:25:49"), class =
"factor"),
EndDateTime = structure(c(2L, 3L, 4L, 8L, 10L, 9L, 5L, 11L,
6L, 1L, 7L, 12L), .Label = c("19/06/2013 22:16:29",
"22/07/2013
11:00:21",
"22/07/2013 11:43:35", "22/07/2013 11:45:40",
"25/07/2013 14:18:42",
"25/07/2013 14:19:44", "25/07/2013 14:20:06",
"25/07/2013 14:20:39",
"25/07/2013 14:20:46", "25/07/2013 14:25:14",
"25/07/2013 14:25:49",
"25/07/2013 14:30:59"), class = "factor"), Power =
c(17L,
27L, 17L, 65L, 21L, 28L, 79L, 18L, 25L, 92L, 183L, 177L),
Events = c(1L, 5L, 14L, 4L, 3L, 1L, 3L, 1L, 1L, 5L, 40L,
40L)), .Names = c("Station", "Antenna", "Tag",
"StartDateTime",
"EndDateTime", "Power", "Events"), class =
"data.frame", row.names = c(NA,
-12L))
--
Raoni Rosa Rodrigues
Research Associate of Fish Transposition Center CTPeixes
Universidade Federal de Minas Gerais - UFMG
Brasil
rodrigues.raoni@gmail.com
[[alternative HTML version deleted]]