Turn data.frame on one line

I have this data:

structure(list(type = c("journal", "all", "similar_age_1m", "similar_age_3m", 
"similar_age_journal_1m", "similar_age_journal_3m"), count = c("13972", 
"754555", "22408", "56213", "508", "1035"), rank = c("13759", 
"754043", "22339", "56074", "459", "947"), pct = c("98.48", "99.93", 
"99.69", "99.75", "90.35", "91.50")), .Names = c("type", "count", 
"rank", "pct"), row.names = c(NA, -6L), class = "data.frame")

I would like to turn it into a single line with column names 2:4with a prefix of the corresponding type. for example journal.count, journal.rank... What is the fastest way to do this? For some reason, dcastand reshapethey do not do it for me, and my decision a little too cumbersome.

+5
source share
6 answers

You mentioned reshape2, so here is the way with this:

library("reshape2")
dcast(melt(dat, id.var="type"), 1~variable+type)

This gives:

  1 count_all count_journal count_similar_age_1m count_similar_age_3m
1 1    754555         13972                22408                56213
  count_similar_age_journal_1m count_similar_age_journal_3m rank_all
1                          508                         1035   754043
  rank_journal rank_similar_age_1m rank_similar_age_3m
1        13759               22339               56074
  rank_similar_age_journal_1m rank_similar_age_journal_3m pct_all pct_journal
1                         459                         947   99.93       98.48
  pct_similar_age_1m pct_similar_age_3m pct_similar_age_journal_1m
1              99.69              99.75                      90.35
  pct_similar_age_journal_3m
1                      91.50

typeand variable are separated _instead ..

+5
source

Here's another way:

y <- as.numeric(as.matrix(x[-1])) # flatten the data.frame
names(y) <- as.vector(outer(x[['type']], names(x)[-1], paste, sep='.'))
+2
source

, "" , R. , data.frame :

mydf$id <- 1
(mydfw <- reshape(mydf, direction = "wide", idvar="id", timevar="type"))
#   id count.journal rank.journal pct.journal count.all rank.all pct.all
# 1  1         13972        13759       98.48    754555   754043   99.93
#   count.similar_age_1m rank.similar_age_1m pct.similar_age_1m
# 1                22408               22339              99.69
#   count.similar_age_3m rank.similar_age_3m pct.similar_age_3m
# 1                56213               56074              99.75
#   count.similar_age_journal_1m rank.similar_age_journal_1m
# 1                          508                         459
#   pct.similar_age_journal_1m count.similar_age_journal_3m
# 1                      90.35                         1035
#   rank.similar_age_journal_3m pct.similar_age_journal_3m
# 1                         947                      91.50

, .

mydfw <- mydfw[, unlist(sapply(names(mydf), grep, names(mydfw)))]
+2
source

Here is a solution using expand.gridto get names.

To get the data, first a subset to remove the first column that contains the names. Then transpose and convert to numeric.

> eg <- expand.grid(colnames(x[, -1]), x[, 1])
> setNames(as.numeric(t(x[, -1])), paste(eg[[2]], eg[[1]], sep="."))
               journal.count                 journal.rank 
                    13972.00                     13759.00 
                 journal.pct                    all.count 
                       98.48                    754555.00 
                    all.rank                      all.pct 
                   754043.00                        99.93 
        similar_age_1m.count          similar_age_1m.rank 
                    22408.00                     22339.00 
          similar_age_1m.pct         similar_age_3m.count 
                       99.69                     56213.00 
         similar_age_3m.rank           similar_age_3m.pct 
                    56074.00                        99.75 
similar_age_journal_1m.count  similar_age_journal_1m.rank 
                      508.00                       459.00 
  similar_age_journal_1m.pct similar_age_journal_3m.count 
                       90.35                      1035.00 
 similar_age_journal_3m.rank   similar_age_journal_3m.pct 
                      947.00                        91.50 
+1
source
#assuming your data is called "test"
result <- as.data.frame(matrix(t(test[-1]),nrow=1),stringsAsFactors=FALSE)
names(result) <- as.vector(t(outer(unique(test$type),names(test[-1]),paste,sep=".")))

str(result)
'data.frame':   1 obs. of  18 variables:
 $ journal.count               : chr "13972"
 $ journal.rank                : chr "13759"
 $ journal.pct                 : chr "98.48"
 $ all.count                   : chr "754555"
 $ all.rank                    : chr "754043"
 $ all.pct                     : chr "99.93"
 $ similar_age_1m.count        : chr "22408"
 $ similar_age_1m.rank         : chr "22339"
 $ similar_age_1m.pct          : chr "99.69"
 $ similar_age_3m.count        : chr "56213"
 $ similar_age_3m.rank         : chr "56074"
 $ similar_age_3m.pct          : chr "99.75"
 $ similar_age_journal_1m.count: chr "508"
 $ similar_age_journal_1m.rank : chr "459"
 $ similar_age_journal_1m.pct  : chr "90.35"
 $ similar_age_journal_3m.count: chr "1035"
 $ similar_age_journal_3m.rank : chr "947"
 $ similar_age_journal_3m.pct  : chr "91.50"
+1
source

Assuming your data frame is called dat here, this is the solution. This is a little rude and maybe not like you:

dat2 <- data.frame(matrix(unlist(lapply(1:nrow(dat), function(i) dat[i, -1])), nrow=1))
colnames(dat2) <- paste0(rep(dat[, 1], each=ncol(dat)-1), ".", 1:(ncol(dat)-1)) 
dat2

If it does not have to be a data frame, this may also work:

dat3 <- as.numeric(unlist(lapply(1:nrow(dat), function(i) dat[i, -1])))
names(dat3) <- paste0(rep(dat[, 1], each=ncol(dat)-1), ".", 1:(ncol(dat)-1)) 
dat3
0
source

All Articles