doing t.test for columns for each row in data set

r dataframe statistics t-test hypothesis-test

12,651

Solution 1

The t.test is used to compare two data sets. Collecting two data sets each from three different columns of a matrix can be done like this:

data_a = c(x[,2:4])
data_b = c(x[,4:8])

These two data sets can be evaluated using t.test at this point:

t.test(data_a, data_b)

Collecting the data from three columns each for two different compounds for a given row (amino acid) we modify and add a loop:

x <- matrix(rnorm(24, mean=0, sd=1), 4, ncol=6)
x
           [,1]       [,2]        [,3]      [,4]       [,5]       [,6]
[1,] -0.4810307  0.3996071  0.90663635 0.7487048  0.5787846  2.0231681
[2,] -2.0454921 -0.1225105 -1.04447522 0.9325333 -1.7782776  0.6856150
[3,] -0.3099937  1.2079548 -0.03835271 0.2751349  1.0111554 -0.4862846
[4,] -0.2834953  0.1930481 -0.57968344 0.1204925 -0.5015843  0.3690397

for(i in 1:nrow(x)){
data_a = c(x[i, 1:3])
data_b = c(x[i, 4:6])
print(t.test(data_a, data_b))
}

Solution 2

x$stat <- sapply(1:nrow(x), function(i) t.test(as.numeric(as.character(unlist(x[i,2:4]))), as.numeric(as.character(unlist(x[i,8:10]))))[c("p.value")])

Solution 3

With this fake data:

df <- data.frame(compound = c("alanine ", "arginine", "asparagine", "aspartate"))
df <- matrix(rnorm(12*4), ncol = 12)
colnames(df) <- c("AC-1", "AC-2", "AC-3", "AM-1", "AM-2", "AM-3", "SC-1", "SC-2", "SC-3", "SM-1", "SM-2", "SM-3")
df <- data.frame(compound = c("alanine ", "arginine", "asparagine", "aspartate"), df)
 df
    compound        AC.1        AC.2       AC.3       AM.1       AM.2        AM.3       SC.1       SC.2       SC.3         SM.1
1   alanine   1.18362683 -2.03779314 -0.7217692 -1.7569264 -0.8381042  0.06866567  0.2327702 -1.1558879  1.2077454  0.437707310
2   arginine -0.19610110  0.05361113  0.6478384 -0.1768597  0.5905398 -0.67945600 -0.2221109  1.4032349  0.2387620  0.598236199
3 asparagine  0.02540509  0.47880021 -0.1395198  0.8394257  1.9046667  0.31175358 -0.5626059  0.3596091 -1.0963363 -1.004673116
4  aspartate -1.36397906  0.91380826  2.0630076 -0.6817453 -0.2713498 -2.01074098  1.4619707 -0.7257269  0.2851122 -0.007027878
         SM.2        SM.3
1 -0.08419146  0.14275728
2 -1.44965718 -0.64314509
3  0.37673942 -0.07245741
4  0.52794136  1.62305413

You can do the following to extract (for example) the p-values:

library(zoo)
rollapply(t(df[, -1]), function(x) t.test(x)$p.value, width = 3, by = 3)
          [,1]      [,2]      [,3]      [,4]
[1,] 0.6308340 0.5702970 0.5783582 0.6468241
[2,] 0.2511564 0.8327439 0.1617192 0.2005518
[3,] 0.9026407 0.4309623 0.4156030 0.6441069
[4,] 0.3878145 0.4909217 0.6239915 0.2747601

Solution 4

Another alternative is using a package.

Your data:

df <- rbind(c(27612820, 22338050, 15359640, 19741350, 18726880, 18510800, 10914980, 12071660, 16036180, 16890860, 16066960, 16364300),
        c(7067206, 7172234, 5933320, 136272600, 131596800, 134717600, 6102838, 7186256, 6770344, 140127100, 155341300, 151748000),
        c(3151398, 2141378, 1240904, 11522180, 8907711, 9842342, 1677299, 2265826, 2942991, 11690360, 12552660, 12102620)
        )

df <- data.frame(df)
rownames(df) <- c("alanine", "arginine", "asparagine")
colnames(df) <- c("AC-1", "AC-2", "AC-3", "AM-1", "AM-2", "AM-3", "SC-1", "SC-2", "SC-3", "SM-1", "SM-2", "SM-3")

Then to run a t-test on every row between AC and SC groups:

library(matrixTests)
> row_t_welch(df[,c("AC-1", "AC-2", "AC-3")], df[,c("SC-1", "SC-2", "SC-3")])
           obs.x obs.y obs.tot   mean.x   mean.y mean.diff          var.x         var.y    stderr       df   statistic    pvalue conf.low conf.high alternative mean.null conf.level
alanine        3     3       6 21770170 13007607 8762563.3 37776970798900 7213669482133 3872580.5 2.736945  2.26271945 0.1171389 -4259692  21784819   two.sided         0       0.95
arginine       3     3       6  6724253  6686479   37774.0   471939373529  298723602417  506840.9 3.807645  0.07452832 0.9443398 -1397926   1473474   two.sided         0       0.95
asparagine     3     3       6  2177893  2295372 -117478.7   913496858185  401148784303  661978.3 3.472571 -0.17746605 0.8690016 -2070931   1835973   two.sided         0       0.95

View more solutions

12,651

Author by

Admin

Updated on June 05, 2022

Comments

Admin almost 2 years

I have a set of data x which consists of 12 columns and 167 rows. The first column is compound Id for each row. I want to run a t.test for 3 column as one group and the other 3 groups as the second group, separately for each row. My code is as below but it does not work.

for (i in 1:nrow(x)) {
 function(i)c(compound=i,
    t.test(x[2:4],x[8:10],
      x[x$compound==i, ],
      alternative='two.sided',conf.level=0.95)
    )
}
print(c(compound=i,t.test(x[2:4],x[8:10],x[x$compound==i,],
    alternative='two.sided',conf.level=0.95)))

My intention was doing a t.test for each metabolite (compound) between AC groups and SC groups, these are two group of cells.

compound    AC-1     AC-2     AC-3     AM-1      AM-2      AM-3      SC-1     SC-2     SC-3     SM-1      SM-2      SM-3
alanine     27612820 22338050 15359640 19741350  18726880  18510800  10914980 12071660 16036180 16890860  16066960  16364300
arginine    7067206  7172234  5933320  136272600 131596800 134717600 6102838  7186256  6770344  140127100 155341300 151748000
asparagine  3151398  2141378  1240904  11522180  8907711   9842342   1677299  2265826  2942991  11690360  12552660  12102620

Admin over 9 years

thanks, but I do not know I should write these new data in the loop or before ?
Bryan over 9 years

Collecting the data of three columns from each row, you will want this in the loop. But be sure to include what row you want. The syntax looks like this: data_a = c(x[1,1:2])