# 清除所有变量并设置工作目录
rm(list = ls())
setwd("C:\\Users\\Administrator\\Desktop\\新建文件夹 (2)")
# 加载必要的库
library(doBy)
# 读取数据
gene <- read.table('table.l5.relative-SE.txt', sep = '\t', header = TRUE, stringsAsFactors = FALSE)
group <- read.table('group.txt', sep = '\t', header = TRUE, stringsAsFactors = FALSE)
# 将gene数据转换为长格式
gene_long <- as.data.frame(t(gene))
colnames(gene_long) <- gene_long[1, ]
gene_long <- gene_long[-1, ]
gene_long$sample <- rownames(gene_long)
gene_long <- reshape2::melt(gene_long, id.vars = 'sample', variable.name = 'gene', value.name = 'expression')
# 合并gene和group数据框
gene_grouped <- merge(gene_long, group, by = 'sample')
gene_grouped$expression <- as.numeric(as.character(gene_grouped$expression))
str(gene_grouped)
# 初始化结果变量
result <- NULL
# 进行Wilcoxon秩和检验
for (gene_id in unique(gene_grouped$gene)) {
sub_data <- subset(gene_grouped, gene == gene_id)
# 确保expression列是数值类型
sub_data$expression <- as.numeric(as.character(sub_data$expression))
if (length(unique(sub_data$group)) > 1) {
test_result <- wilcox.test(expression ~ group, data = sub_data)
if (!is.na(test_result$p.value) && test_result$p.value < 0.05) {
stats <- summaryBy(expression ~ group, data = sub_data, FUN = c(mean, median))
result <- rbind(result, c(gene_id, as.character(stats[1, 1]), stats[1, 2], stats[1, 3], as.character(stats[2, 1]), stats[2, 2], stats[2, 3], test_result$p.value))
}
}
}
# 将结果转换为数据框并添加列名
result <- as.data.frame(result)
names(result) <- c('gene_id', 'group1', 'mean1', 'median1', 'group2', 'mean2', 'median2', 'p_value')
# 进行p值校正
result$p_adjust <- p.adjust(result$p_value, method = 'BH')
# 输出结果到文件
write.table(result, 'table.l5.relative-SE.wilcox.txt', sep = '\t', row.names = FALSE, quote = FALSE)