Reshaping an array to data.frame

r transform dataframe reshape

37,567

Solution 1

Yes, use adply():

adply(x, c(1,2,3))
   Subject Cond Item Measure1 Measure2 Measure3
1       s1    A    1    -0.93   -0.360   -0.005
2       s2    A    1     0.39    1.043    1.090
3       s3    A    1     0.88    0.330    0.360
4       s4    A    1     0.63   -0.120    0.040
5       s5    A    1     0.86   -0.055    0.090
6       s1    B    1    -0.69    0.070    0.170
7       s2    B    1     1.02    0.670    0.680
8       s3    B    1     0.29    0.480    0.510
9       s4    B    1     0.94    0.002    0.090
10      s5    B    1     0.93    0.008    0.120
11      s1    A    2    -0.01   -0.190   -0.050
12      s2    A    2     0.79   -1.390    0.110
13      s3    A    2     0.32    0.980    0.990
14      s4    A    2     0.14    0.430    0.620
15      s5    A    2     0.13   -0.020    0.130
16      s1    B    2    -0.07   -0.150    0.060
17      s2    B    2    -0.63   -0.080    0.270
18      s3    B    2     0.26    0.740    0.740
19      s4    B    2     0.07    0.960    0.960
20      s5    B    2     0.87    0.440    0.450

Solution 2

Use as.data.frame.table().

d0 <- as.data.frame.table(x)
head(d0)

#   Subject Cond Item     Var4  Freq
# 1      s1    A    1 Measure1 -0.93
# 2      s2    A    1 Measure1  0.39
# 3      s3    A    1 Measure1  0.88
# 4      s4    A    1 Measure1  0.63
# 5      s5    A    1 Measure1  0.86
# 6      s1    B    1 Measure1 -0.69

library(tidyr)
d1 <- pivot_wider(data = d0, names_from = "Var4", values_from = "Freq")
head(d1)

#   Subject Cond Item Measure1 Measure2 Measure3
# 1      s1    A    1    -0.93   -0.360   -0.005
# 2      s1    A    2    -0.01   -0.190   -0.050
# 3      s1    B    1    -0.69    0.070    0.170
# 4      s1    B    2    -0.07   -0.150    0.060
# 5      s2    A    1     0.39    1.043    1.090
# 6      s2    A    2     0.79   -1.390    0.110

Solution 3

df = melt(x) gives you something very similar to what you want. Then you could compute the various measure variables from the different levels of measure.

Using the "reshape2" package, try:

dcast(melt(x), Subject + Cond + Item ~ Var4)

Solution 4

ftable pretty much gets you where you need to be:

y <- ftable(x)
y
#
#                    Measure1 Measure2 Measure3
# Subject Cond Item                            
# s1      A    1       -0.930   -0.360   -0.005
#              2       -0.010   -0.190   -0.050
#         B    1       -0.690    0.070    0.170
#              2       -0.070   -0.150    0.060
# s2      A    1        0.390    1.043    1.090
#              2        0.790   -1.390    0.110
#         B    1        1.020    0.670    0.680
#              2       -0.630   -0.080    0.270
# s3      A    1        0.880    0.330    0.360
#              2        0.320    0.980    0.990
#         B    1        0.290    0.480    0.510
#              2        0.260    0.740    0.740
# s4      A    1        0.630   -0.120    0.040
#              2        0.140    0.430    0.620
#         B    1        0.940    0.002    0.090
#              2        0.070    0.960    0.960
# s5      A    1        0.860   -0.055    0.090
#              2        0.130   -0.020    0.130
#         B    1        0.930    0.008    0.120
#              2        0.870    0.440    0.450

But, most people would probably prefer their data in a data.frame. Using as.data.frame.matrix extracts the values, but not the row and column names. ftable stores that information in row.vars and col.vars attributes.

attributes(y)$row.vars
# $Subject
# [1] "s1" "s2" "s3" "s4" "s5"
# 
# $Cond
# [1] "A" "B"
# 
# $Item
# [1] "1" "2"

attributes(y)$col.vars
# [[1]]
# [1] "Measure1" "Measure2" "Measure3"

We can use this information to write a function that converts an ftable to a data.frame:

ftable2df <- function(mydata) {
  ifelse(class(mydata) == "ftable", 
         mydata <- mydata, mydata <- ftable(mydata))
  dfrows <- rev(expand.grid(rev(attr(mydata, "row.vars"))))
  dfcols <- as.data.frame.matrix(mydata)
  names(dfcols) <- do.call(
    paste, c(rev(expand.grid(rev(attr(mydata, "col.vars")))), sep = "_"))
  cbind(dfrows, dfcols)
}

Here it is in use directly on your original "x":

ftable2df(x)
#    Subject Cond Item Measure1 Measure2 Measure3
# 1       s1    A    1    -0.93   -0.360   -0.005
# 2       s1    A    2    -0.01   -0.190   -0.050
# 3       s1    B    1    -0.69    0.070    0.170
# 4       s1    B    2    -0.07   -0.150    0.060
# 5       s2    A    1     0.39    1.043    1.090
# 6       s2    A    2     0.79   -1.390    0.110
# 7       s2    B    1     1.02    0.670    0.680
# 8       s2    B    2    -0.63   -0.080    0.270
# 9       s3    A    1     0.88    0.330    0.360
# 10      s3    A    2     0.32    0.980    0.990
# 11      s3    B    1     0.29    0.480    0.510
# 12      s3    B    2     0.26    0.740    0.740
# 13      s4    A    1     0.63   -0.120    0.040
# 14      s4    A    2     0.14    0.430    0.620
# 15      s4    B    1     0.94    0.002    0.090
# 16      s4    B    2     0.07    0.960    0.960
# 17      s5    A    1     0.86   -0.055    0.090
# 18      s5    A    2     0.13   -0.020    0.130
# 19      s5    B    1     0.93    0.008    0.120
# 20      s5    B    2     0.87    0.440    0.450

View more solutions

37,567

Author by

Amyunimus

I'm an assistant professor at University of Wisconsin-Madison studying visual cognition. I'm interested in what visual information we can acquire without conscious awareness, and how awareness contributes to representing information in the brain. I'm also interested in disambiguating how neural populations encode different types of information using methods like fMRI adaptation and multi-voxel pattern analysis.

Updated on February 04, 2021

Comments

Amyunimus about 3 years

I have the following data structure (an "atomic vector?") output from daply in plyr, in which I had the function return three different measures for each subject, condition, and item.

x = structure(c(-0.93, 0.39, 0.88, 0.63, 0.86, -0.69, 1.02, 0.29, 0.94, 
0.93, -0.01, 0.79, 0.32, 0.14, 0.13, -0.07, -0.63, 0.26, 0.07, 0.87,
-0.36, 1.043, 0.33, -0.12, -0.055, 0.07, 0.67, 0.48, 0.002, 0.008, 
-0.19, -1.39, 0.98, 0.43, -0.02, -0.15,-0.08, 0.74, 0.96, 0.44, -0.005,
1.09, 0.36, 0.04, 0.09, 0.17, 0.68, 0.51, 0.09, 0.12, -0.05, 0.11,
0.99, 0.62, 0.13, 0.06, 0.27, 0.74, 0.96, 0.45), .Dim = c(5L, 
2L, 2L, 3L), .Dimnames = structure(list(Subject = c("s1", "s2", 
"s3", "s4", "s5"), Cond = c("A", "B"), Item = c("1", "2"), c("Measure1", 
"Measure2", "Measure3")), .Names = c("Subject", "Cond", 
"Item", "")))

I want to change it to look like:

Subject Cond Item Measure1 Measure2 Measure3
     s1    A    1    -0.93   -0.360   -0.005
     s1    A    2    -0.01    -0.19    -0.05 
     s1    B    1    -0.69    0.070     0.17
     s1    B    2    -0.07    -0.15     0.06
     s2    A    1     0.39    1.043    1.090
     s2    A    2     0.79    -1.39     0.11
     s2    B    1     1.02    0.670     0.68
     s2    B    2    -0.63    -0.08     0.27

etc.

Is there an easy way to do this?