I am trying to use ddplyc transformto populate a new variable ( summary_Date) in a data frame with IDand variables Date. The value of the variable is selected based on the length of the fragment, which is estimated using ifelse:
If there are less than five observations for an identifier within a month, I want it to summary_Datebe calculated by rounding the date to the next month (using round_datefrom the package lubridate); if within a month there are more than five observations of the identifier, I want it to summary_Datejust be Date.
require(plyr)
require(lubridate)
test.df <- structure(
list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1
, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,2, 2, 2, 2, 2, 2, 2, 2)
, Date = structure(c(-247320000, -246196800, -245073600, -243864000
, -242654400, -241444800, -126273600, -123595200
, -121176000, -118497600, 1359385200, 1359388800
, 1359392400, 1359396000, 1359399600, 1359403200
, 1359406800, 1359410400, 1359414000, 1359417600
, 55598400, 56116800, 58881600, 62078400, 64756800
, 67348800, 69854400, 72964800, 76161600, 79012800
, 1358589600, 1358676000, 1358762400, 1358848800
, 1358935200, 1359021600, 1359108000, 1359194400
, 1359280800, 1359367200), tzone = "GMT"
, class = c("POSIXct", "POSIXt"))
, Val=rnorm(40))
, .Names = c("ID", "Date", "Val"), row.names = c(NA, 40L)
, class = "data.frame")
test.df <- ddply(test.df, .(ID, floor_date(Date, "month")), transform
, summary_Date=as.POSIXct(ifelse(length(ID)<5
, round_date(Date, "month")
,Date)
, origin="1970-01-01 00:00.00"
, tz="GMT")
, length_x = length(ID))
head(test.df,5)
ifelse, , , 'summary_Date', -, , , , . , 3 summary_Date 1962-04-01, 1962-03-27 12:00:00' ( ), summary_Date (1962-03-01) .
EDIT: , data.table, ddply. :
test.df <- ddply(test.df, .(ID, floor_date(Date, "month")), transform
, length_x = length(ID))
test.df <- ddply(test.df, .(ID, floor_date(Date, "month")), transform
, summary_Date=as.POSIXct(ifelse(length_x<5
, round_date(Date, "month")
,Date)
, origin="1970-01-01 00:00.00"
, tz="GMT"))
head(test.df,5)[c(1,3:7)]