Rebuild to R without aggregation (e.g. MTurk response lines)

As a rule, for this I would use a rather simple long reformatted format for you, but it seems to change its aggregation variables. I had work on a mechanical Turk, which I did in three repetitions. I want MTurk1, Mturk2, MTurk3 to be responsible for their own variables in the data frame, but uniquely determined by the field that I entered with the task, so that I can compare them with each other with the function later.

Current format:

> head(mturk)
  AssignmentStatus     Input.id  Input.State  Answer.Q1thing
1         Approved       134231           NY         Myguess
2         Approved       134231           NY         Myguess
3         Approved       134231           NY        BadGuess
4        Submitted       134812           CA         Another
5         Approved       134812           CA         Another
6         Approved       134812           CA         Another

I wish it became

Input.id   Input.State Answer.Q1thing.1 Answer.Q1thing.2 Answer.Q1thing.3  AssignmentStatus.1 AssignmentStatus.2  AssignmentStatus.3
134231              NY          Myguess          Myguess         BadGuess          Approved             Approved            Approved
134812              CA          Another          Another          Another         Submitted             Approved            Approved

or ideally, if there is a variable that can change the column names in the operation ....

Id               State          Answer1          Answer2          Answer3          Status1               Status2             Status3
134231              NY          Myguess          Myguess         BadGuess          Approved             Approved            Approved
134812              CA          Another          Another          Another         Submitted             Approved            Approved

dat <- reshape(mturk, timevar="Answer.Q1thing", idvar=c("Input.id", "Input.state"), direction="wide")

, long-to-wide , , --- , - , 't "MyGuess" "BadGuess" "Another", "Answer.X", . - , , , .

, :

  • ? , , - ?
  • R?
+3
4

data.table , :

library(data.table)    
mturk.dt <- as.data.table(mturk)

mturk.dt[, as.list(
         rbind(c(Answer.Q1thing, AssignmentStatus))
         )
        , by=list(Id=Input.id, State=Input.State)]

, by !


, setnames , , setattr j=.. :

:

## Assuming 'res' is the reshaped data.table form above:
## Change the names of the six V1, V2.. columns 
setnames(res, paste0("V", 1:6), c(paste0("Answer", 1:3), paste0("Status", 1:3)))

, j=..

## Use `as.data.table` instead of `as.list`, to preserve new names
mturk.dt[, as.data.table(
         rbind(c(
              setattr(Answer.Q1thing,   "names", paste0("Answer", seq(Answer.Q1thing  )))
            , setattr(AssignmentStatus, "names", paste0("Status", seq(AssignmentStatus)))
            ))
         )
        , by=list(Id=Input.id, State=Input.State)]

       Id State Answer1 Answer2  Answer3  Status1  Status2  Status3
1: 134231    NY Myguess Myguess BadGuess Approved Approved Approved
2: 134812    CA Myguess Myguess BadGuess Approved Approved Approved
+3

plyr:

res = ddply(dat,.(Input.id,Input.State),
            function(x)unlist(as.character(x$Answer.Q1thing)))
setNames(res,c('Id','State','Answer1','Answer2','Answer3'))
  Id State Answer1 Answer2  Answer3
1 134231    NY Myguess Myguess BadGuess
2 134812    CA Another Another  Another

EDIT

3 :

res = ddply(dat,.(Input.id,Input.State),
            function(x)
              {
              xx= unlist(as.character(x$Answer.Q1thing))
              if(length(xx)==3)xx
              else c(xx,rep(NA,3-length(xx)))
            })
+1

, @ @agstudy. , , , "". , /, , , , .

: ; , , .

< 3, , Input.id 2 , NA , .

, :

mturk$idx <- with(mturk, ave(Input.id, Input.id, FUN=seq_along)) # weird!
dat <- reshape(mturk, timevar="idx", idvar=c("Input.id", "Input.state"), direction="wide")

, . ave(), , , . rtl , . ave (x, x, seq_along), -, , . , , count() rtl() temp.

, data.table .

+1

From data.table v1.9.5 + , it dcastcan process several columns value.var, that is, we can use several columns at the same time, We can just do:

dt[, id := seq_len(.N), by=Input.id]
dcast(dt, Input.id + Input.State ~ id, 
        value.var=c("AssignmentStatus", "Answer.Q1thing"))
#    Input.id Input.State 1_AssignmentStatus 2_AssignmentStatus 3_AssignmentStatus
# 1:   134231          NY           Approved           Approved           Approved
# 2:   134812          CA          Submitted           Approved           Approved
#    1_Answer.Q1thing 2_Answer.Q1thing 3_Answer.Q1thing
# 1:          Myguess          Myguess         BadGuess
# 2:          Another          Another          Another

Or all together in one line:

dcast(dt, Input.id + Input.State ~ dt[, seq_len(.N), by=Input.id]$V1, 
                 value.var=c("AssignmentStatus", "Answer.Q1thing"))
0
source

All Articles