Add a new column that identifies people

My dataset consists of three procedures (C, S, and E) for each person. It looks something like this.

    Year   Cultivar   Site   Distance   Plant   Treat    yield1   yield2
1   2011   Blue       ABR    0m         1       C        0.879    1.5
2   2011   Blue       ABR    0m         1       S        0.384    2.3
3   2011   Blue       ABR    0m         1       E        0.03     0.5
4   2011   Blue       ABR    0m         2       C        0.923    1.2
5   2011   Blue       ABR    0m         2       S        0.344    0.5
6   2011   Blue       ABR    0m         2       E        0.07     0.7
7   2011   Blue       ABR    50m        1       C        0.255    3.4
8   2011   Blue       ABR    50m        1       S        1.00     2.4
9   2011   Blue       ABR    50m        1       E        0.1      0.9
.
.
.

I have data for two years, 2 varieties, 15 sites, 3 distances per site and 10 plants per distance. I basically have a lot of data (> 1400 rows). What I want to do is add a new column that assigns a new number to each of the study participants. I want my data to look like this.

    Individual  Year   Cultivar   Site   Distance   Plant   Treat    yield1   yield2
1   1           2011   Blue       ABR    0m         1       C        0.879    1.5
2   1           2011   Blue       ABR    0m         1       S        0.384    2.3
3   1           2011   Blue       ABR    0m         1       E        0.03     0.5
4   2           2011   Blue       ABR    0m         2       C        0.923    1.2
5   2           2011   Blue       ABR    0m         2       S        0.344    0.5
6   2           2011   Blue       ABR    0m         2       E        0.07     0.7
7   3           2011   Blue       ABR    50m        1       C        0.255    3.4
8   3           2011   Blue       ABR    50m        1       S        1.00     2.4
9   3           2011   Blue       ABR    50m        1       E        0.1      0.9
.
.
.

R, , , . , "" * * * * , , , .

!

+5
4

plyr:

library(plyr)
df$id <- id(df[c("Year","Cultivar", "Site", "Distance", "Plant")], drop=TRUE) 
#Add whichever columns contain the unique combination you require
df

 Year Cultivar Site Distance Plant Treat yield1 yield2 id
1 2011     Blue  ABR       0m     1     C  0.879    1.5  1
2 2011     Blue  ABR       0m     1     S  0.384    2.3  1
3 2011     Blue  ABR       0m     1     E  0.030    0.5  1
4 2011     Blue  ABR       0m     2     C  0.923    1.2  2
5 2011     Blue  ABR       0m     2     S  0.344    0.5  2
6 2011     Blue  ABR       0m     2     E  0.070    0.7  2
7 2011     Blue  ABR      50m     1     C  0.255    3.4  3
8 2011     Blue  ABR      50m     1     S  1.000    2.4  3
9 2011     Blue  ABR      50m     1     E  0.100    0.9  3
+4

data.table .GRP

.GRP - 1, . 1 1- , 2 2- ..

library(data.table)
DT <- data.table(df)

DT[,grp :=.GRP,by = list(Year,Cultivar, Site, Distance, Plant)]
+4

Solution without using additional packages:

df$id <- factor(apply(df[,c("Year","Cultivar", "Site", "Distance", "Plant")], 1, paste, collapse=""))
levels(df$id) <- 1:length(levels(df$id))
+2
source

Not the best solution here, but the solution:

library(qdap)
df$id <- as.numeric(factor(paste2(df[qcv(terms="Year Cultivar Site Distance Plant")])))
+1
source

All Articles