The average aggregation value "% H% M" in the "weekly" bins in R

I struggled with this for a while. I am new to ts data and all related R packages. I have a df with several variables, including the "time of day" in GMT "% H% M" and the date "% Y /% m /% e". I want bin / to summarize the date data in "weeks" (ie% W /% g) and calculate the average "time of day" when the sample occurred during that week.

I was able to calculate another FUN on numeric variables (e.g. weight), first converting my df to a zoo object, and then using the aggregate.zoo command as follows:

#calculate the sum weight captured every week 
x2c <- aggregate(OA_zoo, as.Date(cut(time(OA_zoo), "week")), sum)

However, I'm not sure how to get around the fact that I am working with the Date format, not num, and I will be grateful for any advice! In addition, I obviously encoded a lot, executing each of my variables separately. Would there be a way to apply different FUNs (sum / average / max / min) on my df by aggregating "weekly" using plyr? Or some other packages?

edits / EXPLANATIONS Here is a dputsample output of my complete dataset. I have data for the period from 2004 to 2011. What I would like to look at / plot using ggplot2 is the average / average TIME (% H% M), aggregated over the period of weeks over time (2004-2011). Now my data is not aggregated per week, but daily (random sampling).

> dput(godin)
structure(list(depth = c(878, 1200, 1170, 936, 942, 964, 951, 
953, 911, 969, 960, 987, 991, 997, 1024, 978, 1024, 951, 984, 
931, 1006, 929, 973, 986, 935, 989, 1042, 1015, 914, 984), duration = c(0.8, 
2.6, 6.5, 3.2, 4.1, 6.4, 7.2, 5.3, 7.4, 7, 7, 5.5, 7.5, 7.3, 
7.5, 7, 4.2, 3, 5, 5, 9.3, 7.9, 7.3, 7.2, 7, 5.2, 8, 6, 7.5, 
7), Greenland = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 40L, 28L, 0L, 
0L, 34L, 7L, 28L, 0L, 0L, 0L, 27L, 0L, 0L, 0L, 44L, 59L, 0L, 
0L, 0L, 0L, 0L, 0L), date2 = structure(c(12617, 12627, 12631, 
12996, 12669, 13036, 12669, 13036, 12670, 13036, 12670, 13037, 
12671, 13037, 12671, 13037, 12671, 13038, 12672, 13038, 12672, 
13038, 12672, 13039, 12631, 12997, 12673, 13039, 12673, 13039
), class = "Date"), TIME = c("0940", "0145", "0945", "2045", 
"1615", "0310", "2130", "1045", "0625", "1830", "1520", "0630", 
"0035", "1330", "0930", "2215", "2010", "0645", "0155", "1205", 
"0815", "1845", "2115", "0350", "1745", "0410", "0550", "1345", 
"1515", "2115")), .Names = c("depth", "duration", "Greenland", 
"date2", "TIME"), class = "data.frame", row.names = c("6761", 
"9019", "9020", "9021", "9022", "9023", "9024", "9025", "9026", 
"9027", "9028", "9029", "9030", "9031", "9032", "9033", "9034", 
"9035", "9036", "9037", "9038", "9039", "9040", "9041", "9042", 
"9043", "9044", "9045", "9046", "9047"))
+5
source
3

: , :

godin$week <- format(godin$date2, "%Y-W%U")

- "2004-W26", aggregate.

, HHMM, , .

godin$time2 <- as.POSIXct(strptime(godin$TIME, "%H%M"))

: ... strptime() , , , , . ...

, , :

x2c <- aggregate(time2~week, data=godin, FUN=mean)

( )

x2c$time2 <- format(x2c$time2,"%H:%M:%S")

et Voila.

> x2c
      week    time2
1 2004-W29 09:40:00
2 2004-W30 01:45:00
3 2004-W31 13:45:00
4 2004-W36 12:07:00
5 2004-W37 10:32:30
6 2005-W31 12:27:30
7 2005-W36 10:48:20
8 2005-W37 13:11:06

, , R. , .

+3

TIME (.. ). :

hour2min <- function(hhmm) {
  hhmm <- as.numeric(hhmm)
  trunc(hhmm/100)*60 + hhmm %% 100
}
min2hour <- function(min) {
  min <- as.numeric(min)
  trunc(min/60)*100 + min %% 60
}

, . xts apply.weekly:

library(xts)
x <- xts(hour2min(godin$TIME), as.Date(godin$date2), dimnames=list(NULL,"MINS"))
w <- apply.weekly(x, mean)
w$TIME <- min2hour(w$MINS)
#                MINS     TIME
# 2004-07-18 580.0000  940.000
# 2004-08-01 585.0000  945.000
# 2004-09-12 711.2500 1151.250
# 2005-08-02 747.5000 1227.500
# 2005-09-11 746.6667 1226.667
# 2005-09-13 764.1667 1244.167

100- , ...

+2

, , (, 19:00 19:00 07:00 ),

godin$datetime <- as.POSIXct(paste(godin$date2, godin$TIME), format="%Y-%m-%d %H%M")
aggregate(godin$datetime, list(format(godin$datetime, "%W/%g")), mean)

#  Group.1                   x
#1   28/04 2004-07-18 09:40:00
#2   30/04 2004-07-31 01:45:00
#3   31/05 2005-08-02 00:27:30
#4   36/04 2004-09-10 13:51:15
#5   36/05 2005-09-11 00:26:40
#6   37/05 2005-09-13 00:44:10

, , , ( , ). , .

godin$stime <- as.POSIXct(paste("1970-01-01", godin$TIME), format='%Y-%m-%d %H%M')
aggregate(godin$stime, list(format(godin$datetime, "%W/%g")), mean)

#  Group.1                   x
#1   28/04 1970-01-01 09:40:00
#2   30/04 1970-01-01 09:45:00
#3   31/05 1970-01-01 12:27:30
#4   36/04 1970-01-01 11:51:15
#5   36/05 1970-01-01 12:26:40
#6   37/05 1970-01-01 12:44:10

Edit

, @JoshuaUlrich, , . , , 100- . (, - format, .)

out <- aggregate(godin$stime, list(format(godin$datetime, "%W/%g")), mean)
out[order(as.numeric(paste0(substr(out[, 1], 4, 5), substr(out[, 1], 1, 2)))), ]
#  Group.1                   x
#1   28/04 1970-01-01 09:40:00
#2   30/04 1970-01-01 09:45:00
#4   36/04 1970-01-01 11:51:15
#3   31/05 1970-01-01 12:27:30
#5   36/05 1970-01-01 12:26:40
#6   37/05 1970-01-01 12:44:10

2

format aggregate, , %H%M

out <- aggregate(godin$stime, list(format(godin$datetime, "%W/%g")), function(TIME) format(mean(TIME), "%H%M"))
out[order(as.numeric(paste0(substr(out[, 1], 4, 5), substr(out[, 1], 1, 2)))), ]
#  Group.1    x
#1   28/04 0940
#2   30/04 0945
#4   36/04 1151
#3   31/05 1227
#5   36/05 1226
#6   37/05 1244
+1

All Articles