初学python,批量选取文件中某一部分内容,然后将内容逐条存于一个文件中

下面这个,是将一个大的fasta文件,分割成小的,程序运行顺利。

import sys, getopt
from Bio import SeqIO

# get parameter
opts, args = getopt.getopt(sys.argv[1:], "hi:o:x:")
X = 1
input_file = "FANCE_alignment.fasta"
prefix = "split_"
for op, value in opts:
    if op == "-i":
        input_file = value
    elif op == "-o":
        prefix = value
    elif op == "-x":
        X = int(value)
    elif op == "-h":
        print("Usage: python3 splitmethod.py -i input.fasta -o prefix -x split_number")
        print("default prefix = split_")
        print("default split_number = 1")
        sys.exit()

FA_in_file = open(input_file, "r")

# read fasta file to a list
fa_Info = []
fa_Seq = []
fa_Num = -1
for Y in FA_in_file.readlines():
    Y = Y.rstrip()
    if Y[0] == ">":
        fa_Info.append(Y)
        fa_Num = fa_Num + 1
        fa_Seq.append("")
    else:
        fa_Seq[fa_Num] = fa_Seq[fa_Num] + Y
print("OK")

# split the fasta list to multipe files
file_Num = (fa_Num + 1) // X + 1
for i in range(file_Num):
    exec(prefix + str(i + 1) + ' = open("' + prefix + str(i + 1) + '.fasta"' + ', "w")')
    start = i * X
    end = (i + 1) * X
    if end > fa_Num + 1:
        end = fa_Num + 1
    for j in range(start, end, 1):
        exec(prefix + str(i + 1) + '.write(fa_Info[j] + "\\n")')
        while len(fa_Seq[j]) > 60:
            exec(prefix + str(i + 1) + '.write(fa_Seq[j][:60] + "\\n")')
            fa_Seq[j] = fa_Seq[j][60:]
        else:
            exec(prefix + str(i + 1) + '.write(fa_Seq[j] + "\\n")')
    exec(prefix + str(i + 1) + '.close()')

FA_in_file.close()

下面这个程序是将上述分割的小的fasta文件中的特定位置的内容选取,并存于新的文件中。程序运行顺利。

from Bio import SeqIO

fin = open('split_1.fasta', 'r')
fout = open('selected_split_1.fasta', 'w')

with open("selected_split_1.fasta","w") as f:
        for seq_record in SeqIO.parse("split_1.fasta", "fasta"):
                f.write(">")
                f.write(str(seq_record.id) + "\n")
                f.write(str(seq_record.seq[421:480]) + "\n")  #start 421 to end 480 base positions

fin.close()
fout.close()

但,我想实现把这第二个的功能如何组合在第一个程序中,从而实现,分割文件,再选取分割的小文件中的特定内容。并将选取的内容存于一个文件中。

weixin_44644738
Elsa镇魂女孩 # -*- coding:utf-8 -*- import os from Bio import SeqIO # root_dir为要读取文件的根目录 root_dir = r"C:\Users\2350586L\PycharmProjects\split\splitE" # 读取批量文件后要写入的文件 with open("FANCE1020_1080.fasta", "w") as f: # 依次读取根目录下的每一个文件 for file in os.listdir(root_dir): file_name = root_dir + "\\" + file filein = open(file_name, "r") # 按行读取每个文件中的内容 for seq_record in SeqIO.parse(file_name, "fasta"): f.write(">") f.write(str(seq_record.id) + "\n") f.write(str(seq_record.seq[481:540]) + "\n") #start 481 to end 540 base positions filein.close() print("FINISHED")
8 个月之前 回复
weixin_45179700
weixin_45179700 哇,这也算初学吗?
8 个月之前 回复

2个回答

# -*- coding:utf-8 -*-
import os
from Bio import SeqIO

# root_dir为要读取文件的根目录
root_dir = r"C:\Users\2350586L\PycharmProjects\split\splitE"
# 读取批量文件后要写入的文件
with open("FANCE1020_1080.fasta", "w") as f:

    # 依次读取根目录下的每一个文件
    for file in os.listdir(root_dir):
        file_name = root_dir + "\\" + file
        filein = open(file_name, "r")
        # 按行读取每个文件中的内容
        for seq_record in SeqIO.parse(file_name, "fasta"):
                f.write(">")
                f.write(str(seq_record.id) + "\n")
                f.write(str(seq_record.seq[1020:1080]) + "\n")  #start 481 to end 540 base positions

        filein.close()
print("FINISHED")

from Bio import SeqIO

for xx in range(1, 68):
xn = "split__" + str(xx) + ".fasta"
yn = "selected_split_" + str(xx) + ".fasta"

fin = open(xn, 'r')
fout = open(yn, 'w')

with open(yn,"w") as f:
for seq_record in SeqIO.parse(xn, "fasta"):
f.write(">")
f.write(str(seq_record.id) + "\n")
f.write(str(seq_record.seq[421:480]) + "\n") #start 421 to end 480 base positions

fin.close()
fout.close()

Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
立即提问