R:Alpha多样性与箱线图

发布时间 2023-11-24 15:25:14作者: 王哲MGG_AI

以上是学习的源头,载入了自定义包,但是有修改颜色的需求,只能自己重新定义函数。

rm (list = ls ())
setwd("C:\\Users\\Administrator\\Desktop\\alpha多样性_箱线图")
library(devtools)
alpha_boxplot_custom_color <- function(alpha_div, metadata, index = "richness", groupID = "Group", 
                                       outlier = TRUE, colors = c("#E9967A", "#FFA07A", "#FF7F50", "#FF4500")) 
{
  p_list = c("ggplot2", "dplyr", "multcompView")
  for (p in p_list) {
    if (!requireNamespace(p)) {
      install.packages(p)
    }
    suppressPackageStartupMessages(library(p, character.only = TRUE, 
                                           quietly = TRUE, warn.conflicts = FALSE))
  }
  idx = rownames(metadata) %in% rownames(alpha_div)
  metadata = metadata[idx, , drop = F]
  alpha_div = alpha_div[rownames(metadata), ]
  sampFile = as.data.frame(metadata[, groupID], row.names = row.names(metadata))
  df = cbind(alpha_div[rownames(sampFile), index], sampFile)
  colnames(df) = c(index, "group")
  model = aov(df[[index]] ~ group, data = df)
  Tukey_HSD = TukeyHSD(model, ordered = TRUE, conf.level = 0.95)
  Tukey_HSD_table = as.data.frame(Tukey_HSD$group)
  #Tukey_HSD_table$p_value <- Tukey_HSD$group[, "p adj"]
  #options(digits=10)
  #if (Tukey_HSD_table$p_value < 0.0001) {
    #print("The p-values are: p<0.0001")
  #} else {
    # 打印p值
   # print(paste("The p-values are: ", Tukey_HSD_table$p_value))
 # }
  write.table(paste(date(), "\nGroup\t", groupID, "\n\t", sep = ""), 
              file = paste("alpha_boxplot_TukeyHSD.txt", sep = ""), 
              append = T, quote = F, eol = "", row.names = F, col.names = F)
  suppressWarnings(write.table(Tukey_HSD_table, file = paste("alpha_boxplot_TukeyHSD.txt", 
                                                             sep = ""), append = T, quote = F, sep = "\t", eol = "\n", 
                               na = "NA", dec = ".", row.names = T, col.names = T))
  generate_label_df = function(TUKEY, variable) {
    Tukey.levels = TUKEY[[variable]][, 4]
    Tukey.labels = data.frame(multcompLetters(Tukey.levels)["Letters"])
    Tukey.labels$group = rownames(Tukey.labels)
    Tukey.labels = Tukey.labels[order(Tukey.labels$group), 
    ]
    return(Tukey.labels)
  }
  if (length(unique(df$group)) == 2) {
    library(agricolae)
    out = LSD.test(model, "group", p.adj = "none")
    stat = out$groups
    df$stat = stat[as.character(df$group), ]$groups
  }
  else {
    LABELS = generate_label_df(Tukey_HSD, "group")
    df$stat = LABELS[as.character(df$group), ]$Letters
  }
  max = max(df[, c(index)])
  min = min(df[, index])
  x = df[, c("group", index)]
  y = x %>% group_by(group) %>% summarise_(Max = paste("max(", 
                                                       index, ")", sep = ""))
  y = as.data.frame(y)
  rownames(y) = y$group
  df$y = y[as.character(df$group), ]$Max + (max - min) * 0.05
  # 设置分组的顺序
  levels_order <- c("Mo17_W4", "Mo17_W6", "Mo17_W8", "Mo17_W10")
  
  # 使用factor函数来设置分组的顺序
  df$group <- factor(df$group, levels = levels_order)
  if (outlier) {
    p = ggplot(df, aes(x = group, y = .data[[index]], color = group)) + 
      geom_boxplot(alpha = 1, size = 0.7, width = 0.5, 
                   fill = "transparent") + labs(x = "Groups", y = paste(index, 
                                                                        "index"), color = groupID) + theme_classic() + geom_text(data = df, 
                                                                                                                                 aes(x = group, y = y, label = stat), color = "black", alpha = 0.1, size = 2.4, lineheight = 0.5) + 
      geom_jitter(position = position_jitter(0.17), size = 1, 
                  alpha = 0.7) + theme(text = element_text(family = "sans", 
                                                           size = 7)) +
      scale_color_manual(values = colors)  # 添加这一行来指定颜色
    p
  }
  else {
    p = ggplot(df, aes(x = group, y = .data[[index]], color = "group")) + 
      geom_boxplot(alpha = 1, outlier.shape = NA, outlier.size = 0, 
                   size = 0.7, width = 0.5, fill = "transparent") + 
      labs(x = "Groups", y = paste(index, "index"), color = groupID) + 
      theme_classic() + geom_text(data = df, aes(x = group, 
                                                 y = y, label = stat), color = "black", alpha = 0.1, size = 2.4, lineheight = 0.5) + geom_jitter(position = position_jitter(0.17), 
                                                                                                                                               size = 1, alpha = 0.7) + theme(text = element_text(family = "sans", 
                                                                                                                                                                                                  size = 7)) +
      scale_color_manual(values = colors)  # 添加这一行来指定颜色
    p
  }
}
#是的,当数据只有两个组时,仍然可以进行单因素方差分析。
#然而,当只有两个组时,通常会选择使用t检验(t-test)而不是方差分析,因为t检验在这种情况下的统计效力更高。
#但是,如果选择使用这个函数,当组的数量为2时,它会自动使用LSD.test进行最小显著差异(Least Significant Difference)检验,而不是Tukey HSD检验。
#这是因为Tukey HSD检验通常用于比较三个或更多组之间的差异,而LSD.test则适用于两个组的比较。
#所以,即使组只有两个,仍然可以使用这个函数来进行分析
#suppressWarnings(suppressMessages(library(amplicon)))
metadata = read.table

metadata = read.table("metadata.txt", header=T, row.names=1, sep="\t", comment.char="", stringsAsFactors = F)
head(metadata, n = 3)
alpha_div = read.table("vegan.txt", header=T, row.names=1, sep="\t", comment.char="")
head(alpha_div, n = 3)
p = alpha_boxplot_custom_color(alpha_div, index = "simpson", metadata, groupID = "Group")

ggsave(paste0("alpha_boxplot_simpson.pdf"), p, width=89, height=56, units="mm")
ggsave(paste0("8.png"), p, width=89, height=56, units="mm")