批量生成AF2预测的mCherry+split intein序列

发布时间 2024-01-08 14:06:23作者: 计算之道

所需文件列表:

文件介绍:

(1)Junction_seqences.dat为连接序列,其格式如下:

 (2)Seq_of_IntN.dat为Intein的N端序列,其格式如下:

 (3)Seq_of_IntC.dat为Intein的C端序列,其格式如下:

  (4)./seq_for_predict/ 为输出文件存放的文件夹

 (5)gene_seq.py脚本的内容如下:

mrc@mrc-Precision-3660:~/project/alphafold2/alphafold2_Vconda/prediction_Split_Int$ cat  gene_seq.py
#Python3
#maoruichao@2024.1.8
#Usage: python3 gene_seq.py

##Extein 序列
ExN_seq = 'MVSKGEEDNMAIIKEFMRFKVHMEGSVNGHEFEIEGEGEGRPYEGTQTAKLKVTKGGPLPFAWDILSPQFMYGSKAYVKHPADIPDYLKLSFPEGFKWERVMNFEDGGVVTVTQDSSLQDGEFIYKVKLRGTNFPSDGPVMQKKTMGWEASSERMYPED'
ExC_seq = 'GALKGEIKQRLKLKDGGHYDAEVKTTYKAKKPVQLPGAYNVNIKLDITSHNEDYTIVEQYERAEGRHSTGGMDELYK'

##Open the files
#IntN序列
file_IntN = open('Seq_of_IntN.dat','r')
list_SeqIntN = file_IntN.readlines()
list_SeqIntN_clean = [line.strip() for line in list_SeqIntN if line != '\n' and not line.startswith("#")]
#Linker序列
file_linker = open('Junction_seqences.dat','r')
list_linker = file_linker.readlines()
list_linker_clean = [line.strip() for line in list_linker if line != '\n' and not line.startswith("#")]
#IntC序列
file_IntC = open('Seq_of_IntC.dat','r')
list_SeqIntC = file_IntC.readlines()
list_SeqIntC_clean = [line.strip() for line in list_SeqIntC if line != '\n' and not line.startswith("#")]
#print (len(list_SeqIntN_clean),len(list_SeqIntC_clean))

##For Inteins
#遍历Int_N
num_row = 0
for line_intN in list_SeqIntN_clean:
    if line_intN.endswith(':'):
        num_row += 1
        index1 = line_intN.rfind('-')
        if index1 != -1:
            int_name = line_intN[:index1]
        intN_name = line_intN[:-1]
        intN_seq = list_SeqIntN_clean[list_SeqIntN_clean.index(line_intN) + 1]
        #获取N-linker序列
        for line_linker in list_linker_clean:
            list_line = line_linker.split(':')
            linker_name = list_line[0]
            if int_name == linker_name:
                list_linker = list_line[1].split('/')
                linker_N = list_linker[0]
                #linker_C = list_linker[1]
            else:
                #print ('int_name != linker_name')
                #print ('int_name:', int_name,'\n','linker_name:',linker_name)
                pass
        #遍历Int_C
        num_column = 0
        for line_intC in list_SeqIntC_clean:
            if line_intC.endswith(':'):
                num_column += 1
                index2 = line_intC.rfind('-')
                if index2 != -1:
                    int2_name = line_intC[:index2]
                intC_name = line_intC[:-1]
                intC_seq = list_SeqIntC_clean[list_SeqIntC_clean.index(line_intC) + 1]

                #获取C-linker序列
                for line_linker in list_linker_clean:
                    list_line = line_linker.split(':')
                    linker_name = list_line[0]
                    if int2_name == linker_name:
                        list_linker = list_line[1].split('/')
                        #linker_N = list_linker[0]
                        linker_C = list_linker[1]
                    else:
                        #print ('int_name != linker_name')
                        #print ('int_name:', int_name,'\n','linker_name:',linker_name)
                        pass

                #生成AF2预测的输入序列
                All_seqN = ExN_seq  + linker_N + intN_seq
                All_seqC = intC_seq + linker_C + ExC_seq
                #Output test
                print (num_row, num_column, intN_name, intC_name, All_seqN, All_seqC)

                #Write to file
                filename = './seq_for_predict/' + format(num_row,'02d') + '_' + format(num_column,'02d') + intN_name + '+' + intC_name + '.fasta'
                title1 = '>' + intN_name + '|Ex_N+linker+Int_N'
                title2 = '>' + intC_name + '|Int_C+linker+Ex_C'
                with open(filename, 'w') as newfile:
                    newfile.write(title1 + '\n')
                    newfile.write(All_seqN + '\n')
                    newfile.write(title2 + '\n')
                    newfile.write(All_seqC + '\n')
            else:
                pass
    else:
        pass

#Close file
file_IntN.close()
file_linker.close()
file_IntC.close()