I have two data frames; one is 48 lines long and looks like this:
name = Z31
Est.Date Site Cultivar Planting
1 24/07/2011 Birchip Axe 1
2 08/08/2011 Birchip Bolac 1
3 24/07/2011 Birchip Derrimut 1
4 12/08/2011 Birchip Eaglehawk 1
5 29/07/2011 Birchip Gregory 1
6 29/07/2011 Birchip Lincoln 1
7 23/07/2011 Birchip Mace 1
8 29/07/2011 Birchip Scout 1
9 17/09/2011 Birchip Axe 2
10 19/09/2011 Birchip Bolac 2
Another → 23,000 lines and contains the output from the simulator. It looks like this:
name = pred
Date maxt mint Cultivar Site Planting tt cum_tt
1 5/05/2011 18 6.5 Axe Birchip 1 12.25 0
2 6/05/2011 17.5 2.5 Axe Birchip 1 10 0
3 7/05/2011 18 2.5 Axe Birchip 1 10.25 0
4 8/05/2011 19.5 2 Axe Birchip 1 10.75 0
5 9/05/2011 17 4.5 Axe Birchip 1 10.75 0
6 10/05/2011 15.5 -0.5 Axe Birchip 1 7.5 0
7 11/05/2011 14 5.5 Axe Birchip 1 9.75 0
8 12/05/2011 19 8 Axe Birchip 1 13.5 0
9 13/05/2011 18.5 7.5 Axe Birchip 1 13 0
10 14/05/2011 16 3.5 Axe Birchip 1 9.75 0
I want the cum_tt column to start adding the tt column of the current row to the cum_tt of the previous row (cumulative addition) ONLY if the date in pre-DF is equal to or greater than Z31 est.Date. I wrote the following loop:
for (i in 1:nrow(Z31)){
for (j in 1:nrow(pred)){
if (Z31[i,]$Site == pred[j,]$Site & Z31[i,]$Cultivar == pred[j,]$Cultivar & Z31[i,]$Planting == pred[j,]$Planting &
pred[j,]$Date >= Z31[i,]$Est.Date)
{
pred[j,]$cum_tt <- pred[j,]$tt + pred[j-1,]$cum_tt
}
}
}
It works, but it is so slow that it takes about an hour to start the whole set. I know that loops are not strong, so can someone help me with vectorizing this operation?
Thanks in advance.
UPDATE
Here is the output from dput (Z31):
structure(list(Est.Date = structure(c(15179, 15194, 15179, 15198,
15184, 15184, 15178, 15184, 15234, 15236, 15230, 15238, 15229,
15236, 15229, 15231, 15155, 15170, 15160, 15168, 15165, 15159,
15170, 15170, 15191, 15205, 15198, 15203, 15202, 15195, 15203,
15206, 15193, 15193, 15195, 15200, 15193, 15205, 15200, 15205,
15226, 15245, 15231, 15259, 15241, 15241, 15241, 15241), class = "Date"),
Site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Birchip", "Gatton",
"Tarlee"), class = "factor"), Cultivar = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L), .Label = c("Axe", "Bolac", "Derrimut", "Eaglehawk",
"Gregory", "Lincoln", "Mace", "Scout"), class = "factor"),
Planting = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L)), .Names = c("Est.Date", "Site",
"Cultivar", "Planting"), row.names = c(NA, -48L), class = "data.frame")
Here before. Please note that there are several additional columns. I just included the relevant ones above for readability.
structure(list(Date = structure(c(15099, 15100, 15101, 15102,
15103, 15104, 15105, 15106, 15107, 15108, 15109, 15110, 15111,
15112, 15113, 15114, 15115, 15116, 15117, 15118), class = "Date"),
flowering_das = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Zadok = c(9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 11.032, 11.085,
11.157), stagename = structure(c(8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 9L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 1L, 3L, 3L, 3L), .Label = c("emergence",
"end_grain_fill", "end_of_juvenil", "floral_initiat", "flowering",
"germination", "maturity", "out", "sowing", "start_grain_fi"
), class = "factor"), node_no = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 2, 2.032, 2.085, 2.157), maxt = c(18,
17.5, 18, 19.5, 17, 15.5, 14, 19, 18.5, 16, 16, 15, 16.5,
16.5, 20.5, 23, 25.5, 16.5, 16.5, 15), mint = c(6.5, 2.5,
2.5, 2, 4.5, -0.5, 5.5, 8, 7.5, 3.5, 6, 1, 5.5, 2, 7, 7,
9, 13.5, 11.5, 8.5), Cultivar = c("Axe", "Axe", "Axe", "Axe",
"Axe", "Axe", "Axe", "Axe", "Axe", "Axe", "Axe", "Axe", "Axe",
"Axe", "Axe", "Axe", "Axe", "Axe", "Axe", "Axe"), Site = c("Birchip",
"Birchip", "Birchip", "Birchip", "Birchip", "Birchip", "Birchip",
"Birchip", "Birchip", "Birchip", "Birchip", "Birchip", "Birchip",
"Birchip", "Birchip", "Birchip", "Birchip", "Birchip", "Birchip",
"Birchip"), Planting = c("1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1"), `NA` = c("Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out",
"Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out",
"Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out",
"Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out",
"Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out",
"Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out",
"Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out", "Birchip TOS1 Axe.out"
), tt = c(12.25, 10, 10.25, 10.75, 10.75, 7.5, 9.75, 13.5,
13, 9.75, 11, 8, 11, 9.25, 13.75, 15, 17.25, 15, 14, 11.75
), cum_tt = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0)), .Names = c("Date", "flowering_das", "Zadok",
"stagename", "node_no", "maxt", "mint", "Cultivar", "Site", "Planting",
NA, "tt", "cum_tt"), row.names = c(NA, 20L), class = "data.frame")
UPDATE
. , . , Subs. , , . Z P.
: 59.77
Subs: 14.62
Subs : 11.12