Find rows with a given difference between values ​​in a column

For a data table (or data.frame) in R, I want to find all the rows that contain the value in the “value” column, which is the given distance “distance” from another value in the row with the same key, So, given the following:

distance <- 22
   key value
   A     1
   B     1
   C     1
   D     1
   A     4
   B     4
   A    23
   B    23
   B    26
   B    26
   C    30

I would like to annotate the source table with counting the number of rows with the same key and value equal to +22:

  key value count
  A     1     1
  B     1     1
  C     1     0
  D     1     0
  A     4     0
  B     4     2
  A    23     0
  B    23     0
  B    26     0
  B    26     0
  C    30     0

I don’t know where to start with this self-referential approach to data manipulation in R. My initial attempts included creating a second table and trying to match it, but it seemed like a strange and bad approach.

Note. I use the package data.table, but I am happy to work with data.frame in this case, if this makes the process easier.

Reproducibility:

require(data.table)
source <- data.table(data.frame(key=c("A","B","C","D","A","B","A","B","B","B", "C"),value=c(1,1,1,1,4,4,23,23,26,26,30)))
result <- data.table(data.frame(key=c("A","B","C","D","A","B","A","B","B","B","C"),value=c(1,1,1,1,4,4,23,23,26,26,30),count=c(1,1,0,0,0,2,0,0,0,0,0)))
+5
2

data.table . , ( ) .

# Your code
library(data.table)
source <- 
data.table(data.frame(key = c("A","B","C","D","A","B","A","B","B","B", "C"),
                      value = c(1,1,1,1,4,4,23,23,26,26,30)))

data.table(data.frame(..., data.table() key. data.table "key". , , :

source <- data.table(Key = c("A","B","C","D","A","B","A","B","B","B","C"),
                     Value = c(1,1,1,1,4,4,23,23,26,26,30))

, as.integer() , Value numeric integer. , 1 numeric R, 1L, integer. integer , integer, integer numeric. , L .

source[,Value:=as.integer(Value)]   # change type from `numeric` to `integer`

distance <- 22L
setkey(source, Key, Value)

# Heart of the solution (following a few explanatory comments):
#  "J()"   : shorthand for 'data.table()'
#  ".N"    : returns the number of rows that matched a line (see ?data.table)
#  "[[3]]" : as with simple data.frames, extracts the vector in column 3

source[,count:=source[J(Key,Value+distance),.N][[3]]]
source
      key value count
 [1,]   A     1     1
 [2,]   A     4     0
 [3,]   A    23     0
 [4,]   B     1     1
 [5,]   B     4     2
 [6,]   B    23     0
 [7,]   B    26     0
 [8,]   B    26     0
 [9,]   C     1     0
[10,]   C    30     0
[11,]   D     1     0

, := source , . setkey() . , :

source <- data.table(Key = c("A","B","C","D","A","B","A","B","B","B","C"),
                     Value = c(1,1,1,1,4,4,23,23,26,26,30))
source[,Value:=as.integer(Value)]   
source[,count:=setkey(copy(source))[source[,list(Key,Value+distance)],.N][[3]]]

      Key Value count
 [1,]   A     1     1
 [2,]   B     1     1
 [3,]   C     1     0
 [4,]   D     1     0
 [5,]   A     4     0
 [6,]   B     4     2
 [7,]   A    23     0
 [8,]   B    23     0
 [9,]   B    26     0
[10,]   B    26     0
[11,]   C    30     0
+5

mapply :

data.table(t(mapply(function(key,val) 
      c(key=key,value=val,count=length(source$value[source$key==key & source$value>(val+distance)]) )
   , as.character(source$key),source$value)))
+1

All Articles