将vcf文件转成孟德尔随机化分析格式

发布时间 2023-09-19 09:15:27作者: 消失的森林

https://gwas.mrcieu.ac.uk/datasets/ukb-b-7330/为例:
原始文件形如:

转换代码

library(vcfR)
getwd()
a_data = read.vcfR('../ukb-b-7330.vcf.gz')
str(a_data)
head(a_data$meta,12)

head(a_data@fix)
head(a_data@gt)

fix = as.data.frame(a_data@fix[,(1:5)])
gt = as.data.frame(a_data@gt[,2])

colnames(gt)= 'GT'

beta = as.numeric(unlist(strsplit(as.character(gt$GT),split = ":"))[seq(1,dim(gt)[1]*5,5)])
se = as.numeric(unlist(strsplit(as.character(gt$GT),split = ":"))[seq(2,dim(gt)[1]*5,5)])
eaf = as.numeric(unlist(strsplit(as.character(gt$GT),split = ":"))[seq(4,dim(gt)[1]*5,5)])
pval = as.numeric(unlist(strsplit(as.character(gt$GT),split = ":"))[seq(3,dim(gt)[1]*5,5)])
mydata = data.frame(beta=beta,se=se,pval=pval,maf=eaf)
mydata = cbind(fix,mydata)
mydata$pval = 10^(-mydata$pval) #还原p值
mydata$N=40899

write.csv(mydata,'vcf2.csv')

输出结果: