weixin_44644738
Elsa镇魂女孩
2019-06-27 20:46
采纳率: 62.5%
浏览 603

初学python,批量选取文件中某一部分内容,然后将内容逐条存于一个文件中

下面这个,是将一个大的fasta文件,分割成小的,程序运行顺利。

import sys, getopt
from Bio import SeqIO

# get parameter
opts, args = getopt.getopt(sys.argv[1:], "hi:o:x:")
X = 1
input_file = "FANCE_alignment.fasta"
prefix = "split_"
for op, value in opts:
    if op == "-i":
        input_file = value
    elif op == "-o":
        prefix = value
    elif op == "-x":
        X = int(value)
    elif op == "-h":
        print("Usage: python3 splitmethod.py -i input.fasta -o prefix -x split_number")
        print("default prefix = split_")
        print("default split_number = 1")
        sys.exit()

FA_in_file = open(input_file, "r")

# read fasta file to a list
fa_Info = []
fa_Seq = []
fa_Num = -1
for Y in FA_in_file.readlines():
    Y = Y.rstrip()
    if Y[0] == ">":
        fa_Info.append(Y)
        fa_Num = fa_Num + 1
        fa_Seq.append("")
    else:
        fa_Seq[fa_Num] = fa_Seq[fa_Num] + Y
print("OK")

# split the fasta list to multipe files
file_Num = (fa_Num + 1) // X + 1
for i in range(file_Num):
    exec(prefix + str(i + 1) + ' = open("' + prefix + str(i + 1) + '.fasta"' + ', "w")')
    start = i * X
    end = (i + 1) * X
    if end > fa_Num + 1:
        end = fa_Num + 1
    for j in range(start, end, 1):
        exec(prefix + str(i + 1) + '.write(fa_Info[j] + "\\n")')
        while len(fa_Seq[j]) > 60:
            exec(prefix + str(i + 1) + '.write(fa_Seq[j][:60] + "\\n")')
            fa_Seq[j] = fa_Seq[j][60:]
        else:
            exec(prefix + str(i + 1) + '.write(fa_Seq[j] + "\\n")')
    exec(prefix + str(i + 1) + '.close()')

FA_in_file.close()

下面这个程序是将上述分割的小的fasta文件中的特定位置的内容选取,并存于新的文件中。程序运行顺利。

from Bio import SeqIO

fin = open('split_1.fasta', 'r')
fout = open('selected_split_1.fasta', 'w')

with open("selected_split_1.fasta","w") as f:
        for seq_record in SeqIO.parse("split_1.fasta", "fasta"):
                f.write(">")
                f.write(str(seq_record.id) + "\n")
                f.write(str(seq_record.seq[421:480]) + "\n")  #start 421 to end 480 base positions

fin.close()
fout.close()

但,我想实现把这第二个的功能如何组合在第一个程序中,从而实现,分割文件,再选取分割的小文件中的特定内容。并将选取的内容存于一个文件中。

  • 点赞
  • 写回答
  • 关注问题
  • 收藏
  • 邀请回答

2条回答 默认 最新

  • weixin_44644738
    Elsa镇魂女孩 2019-07-01 23:10
    已采纳
    # -*- coding:utf-8 -*-
    import os
    from Bio import SeqIO
    
    # root_dir为要读取文件的根目录
    root_dir = r"C:\Users\2350586L\PycharmProjects\split\splitE"
    # 读取批量文件后要写入的文件
    with open("FANCE1020_1080.fasta", "w") as f:
    
        # 依次读取根目录下的每一个文件
        for file in os.listdir(root_dir):
            file_name = root_dir + "\\" + file
            filein = open(file_name, "r")
            # 按行读取每个文件中的内容
            for seq_record in SeqIO.parse(file_name, "fasta"):
                    f.write(">")
                    f.write(str(seq_record.id) + "\n")
                    f.write(str(seq_record.seq[1020:1080]) + "\n")  #start 481 to end 540 base positions
    
            filein.close()
    print("FINISHED")
    
    
    点赞 评论
  • caozhy

    from Bio import SeqIO

    for xx in range(1, 68):
    xn = "split__" + str(xx) + ".fasta"
    yn = "selected_split_" + str(xx) + ".fasta"

    fin = open(xn, 'r')
    fout = open(yn, 'w')

    with open(yn,"w") as f:
    for seq_record in SeqIO.parse(xn, "fasta"):
    f.write(">")
    f.write(str(seq_record.id) + "\n")
    f.write(str(seq_record.seq[421:480]) + "\n") #start 421 to end 480 base positions

    fin.close()
    fout.close()

    点赞 评论

相关推荐