> #!/usr/bin/env Rscript > # 批量 maSigPro:Control / Low / High 三组分别跑(标准化 log2CPM)...
🚨 错误信息
> #!/usr/bin/env Rscript
> # 批量 maSigPro:Control / Low / High 三组分别跑(标准化 log2CPM)
> # 工作目录:D:/ANorth/muilt/呼吸多组学-陈宵/Analysis-lym/1.单组学分析/转录组/4时序分析
>
> library(tidyverse)
> library(maSigPro)
> library(readr)
>
> setwd("D:/ANorth/muilt/呼吸多组学-陈宵/Analysis-lym/1.单组学分析/转录组/4时序分析")
> dir.create("maSigPro_results", showWarnings = FALSE)
>
> groups <- c("Control", "Low", "High")
>
> for (g in groups) {
+ cat("\n====== running", g, "======\n")
+
+ ## 1 读入并对齐 ------------------------------------------------------------
+ mat <- read_csv(str_c("geneMatrix_", g, "_normalized_log2cpm.csv")) %>%
+ column_to_rownames("symble") %>% as.matrix()
+ meta <- read_csv(str_c("meta_", g, ".csv")) %>% arrange(sample)
+ mat <- mat[, meta$sample] # 严格对齐列顺序
+
+ ## 2 design ≥2 列,replicate 为因子
+ design <- data.frame(
+ time = meta$time,
+ replicate = factor(meta$replicate)
+ )
+ rownames(design) <- meta$sample
+
+ ## 3 maSigPro 两步回归 -----------------------------------------------------
+ fit <- p.vector(mat, design, Q = 0.05, family = "gaussian")
+ tfit <- T.fit(fit,
+ step.method = "two.ways.backward",
+ alfa = 0.05,
+ adjust = "fdr") # 使用 fdr 校正
+ sig <- get.siggenes(tfit, rsq = 0.6)
+
+ ## 4 写出结果 --------------------------------------------------------------
+ prefix <- str_c("maSigPro_results/", g)
+
+ write_csv(sig$sig.genes %>% as_tibble(rownames = "gene"),
+ str_c(prefix, "_sig_genes.csv"))
+ write_csv(tfit$sig.models %>% as_tibble(rownames = "gene"),
+ str_c(prefix, "_coef.csv"))
+
+ ## 5 趋势图
+ pdf(str_c(prefix, "_trends.pdf"), width = 8, height = 5)
+ plot(tfit, name = g, clustering = TRUE)
+ dev.off()
+
+ ## 6 可选:持续上调/下调基因
+ coef <- tfit$sig.models
+ up <- rownames(coef)[coef$time > 0 & coef$time < 0.05 & is.na(coef$time2)]
+ down <- rownames(coef)[coef$time < 0 & coef$time < 0.05 & is.na(coef$time2)]
+ write_csv(tibble(gene = up), str_c(prefix, "_up_genes.csv"))
+ write_csv(tibble(gene = down), str_c(prefix, "_down_genes.csv"))
+ }
====== running Control ======
Rows: 13692 Columns: 13
── Column specification ──────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): symble
dbl (12): 6hC1, 6hC2, 6hC3, 24hC1, 24hC2, 24hC3, 72hC1, 72hC2, 72hC3, 120hC1, 120hC2, 120hC3
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 12 Columns: 4
── Column specification ──────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): sample, group
dbl (2): time, replicate
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
错误于family$family: $ operator is invalid for atomic vectors
🤖 AI解决方案
错误于family$family: $ operator is invalid for atomic vectors
Error in family$family: $ operator is invalid for atomic vectors
原因分析
maSigPro::p.vector 函数的 family 参数期望接收一个表示模型族(如 "gaussian", "binomial" 等)的字符串,或者一个 family 对象(如 gaussian())。您传递的 family = "gaussian" 是一个字符串,但 R 内部将其解释为一个原子向量,而不是一个期望的列表或对象,导致在尝试访问 $family 时出错。
解决方案
将 family = "gaussian" 修改为 family = gaussian()。gaussian() 是 R 中用于指定高斯(正态)分布族的一个函数,它会返回一个正确的 family 对象。
# ... (前面的代码保持不变)
## 3 maSigPro 两步回归 -----------------------------------------------------
# 将 family = "gaussian" 修改为 family = gaussian()
fit <- p.vector(mat, design, Q = 0.05, family = gaussian())
tfit <- T.fit(fit,
step.method = "two.ways.backward",
alfa = 0.05,
adjust = "fdr") # 使用 fdr 校正
sig <- get.siggenes(tfit, rsq = 0.6)
# ... (后面的代码保持不变)
预防措施
- 查阅函数文档:在使用新的 R 包或函数时,务必仔细阅读其文档(例如,通过在 R 控制台中输入
?p.vector来查看p.vector函数的帮助文档),了解参数的期望类型和格式。 - 参数类型检查:在遇到类似错误时,可以先尝试打印传递给函数的参数,检查其类型和结构是否符合预期。例如,在
p.vector调用前,可以print(class(gaussian()))来确认gaussian()返回的是一个正确的family对象。