R:Wilcoxon秩和检验(第二版)

发布时间 2024-01-12 20:28:48作者: 王哲MGG_AI
# 清除所有变量并设置工作目录
rm(list = ls())
setwd("C:\\Users\\Administrator\\Desktop\\新建文件夹 (2)")

# 加载必要的库
library(doBy)

# 读取数据
gene <- read.table('table.l5.relative-SE.txt', sep = '\t', header = TRUE, stringsAsFactors = FALSE)
group <- read.table('group.txt', sep = '\t', header = TRUE, stringsAsFactors = FALSE)

# 将gene数据转换为长格式
gene_long <- as.data.frame(t(gene))
colnames(gene_long) <- gene_long[1, ]
gene_long <- gene_long[-1, ]
gene_long$sample <- rownames(gene_long)
gene_long <- reshape2::melt(gene_long, id.vars = 'sample', variable.name = 'gene', value.name = 'expression')

# 合并gene和group数据框
gene_grouped <- merge(gene_long, group, by = 'sample')
gene_grouped$expression <- as.numeric(as.character(gene_grouped$expression))

str(gene_grouped)


# 初始化结果变量
result <- NULL

# 进行Wilcoxon秩和检验
for (gene_id in unique(gene_grouped$gene)) {
  sub_data <- subset(gene_grouped, gene == gene_id)
  
  # 确保expression列是数值类型
  sub_data$expression <- as.numeric(as.character(sub_data$expression))
  
  if (length(unique(sub_data$group)) > 1) {
    test_result <- wilcox.test(expression ~ group, data = sub_data)
    if (!is.na(test_result$p.value) && test_result$p.value < 0.05) {
      stats <- summaryBy(expression ~ group, data = sub_data, FUN = c(mean, median))
      result <- rbind(result, c(gene_id, as.character(stats[1, 1]), stats[1, 2], stats[1, 3], as.character(stats[2, 1]), stats[2, 2], stats[2, 3], test_result$p.value))
    }
  }
}


# 将结果转换为数据框并添加列名
result <- as.data.frame(result)
names(result) <- c('gene_id', 'group1', 'mean1', 'median1', 'group2', 'mean2', 'median2', 'p_value')

# 进行p值校正
result$p_adjust <- p.adjust(result$p_value, method = 'BH')

# 输出结果到文件
write.table(result, 'table.l5.relative-SE.wilcox.txt', sep = '\t', row.names = FALSE, quote = FALSE)