下面这个,是将一个大的fasta文件,分割成小的,程序运行顺利。
import sys, getopt
from Bio import SeqIO
# get parameter
opts, args = getopt.getopt(sys.argv[1:], "hi:o:x:")
X = 1
input_file = "FANCE_alignment.fasta"
prefix = "split_"
for op, value in opts:
if op == "-i":
input_file = value
elif op == "-o":
prefix = value
elif op == "-x":
X = int(value)
elif op == "-h":
print("Usage: python3 splitmethod.py -i input.fasta -o prefix -x split_number")
print("default prefix = split_")
print("default split_number = 1")
sys.exit()
FA_in_file = open(input_file, "r")
# read fasta file to a list
fa_Info = []
fa_Seq = []
fa_Num = -1
for Y in FA_in_file.readlines():
Y = Y.rstrip()
if Y[0] == ">":
fa_Info.append(Y)
fa_Num = fa_Num + 1
fa_Seq.append("")
else:
fa_Seq[fa_Num] = fa_Seq[fa_Num] + Y
print("OK")
# split the fasta list to multipe files
file_Num = (fa_Num + 1) // X + 1
for i in range(file_Num):
exec(prefix + str(i + 1) + ' = open("' + prefix + str(i + 1) + '.fasta"' + ', "w")')
start = i * X
end = (i + 1) * X
if end > fa_Num + 1:
end = fa_Num + 1
for j in range(start, end, 1):
exec(prefix + str(i + 1) + '.write(fa_Info[j] + "\\n")')
while len(fa_Seq[j]) > 60:
exec(prefix + str(i + 1) + '.write(fa_Seq[j][:60] + "\\n")')
fa_Seq[j] = fa_Seq[j][60:]
else:
exec(prefix + str(i + 1) + '.write(fa_Seq[j] + "\\n")')
exec(prefix + str(i + 1) + '.close()')
FA_in_file.close()
下面这个程序是将上述分割的小的fasta文件中的特定位置的内容选取,并存于新的文件中。程序运行顺利。
from Bio import SeqIO
fin = open('split_1.fasta', 'r')
fout = open('selected_split_1.fasta', 'w')
with open("selected_split_1.fasta","w") as f:
for seq_record in SeqIO.parse("split_1.fasta", "fasta"):
f.write(">")
f.write(str(seq_record.id) + "\n")
f.write(str(seq_record.seq[421:480]) + "\n") #start 421 to end 480 base positions
fin.close()
fout.close()
但,我想实现把这第二个的功能如何组合在第一个程序中,从而实现,分割文件,再选取分割的小文件中的特定内容。并将选取的内容存于一个文件中。