如何实现读取多个文件夹下的fasta文件!
一下这一串文件分别存于FANCE,和FANCD的文件夹中。但下面的必须手敲所有文件名去选取,实在敲不动了!求各路大神,高人指点!小女子就此谢过!
proteinNames = ['FANCE1020_1080', 'FANCD21_60', 'FANCD260_120', 'FANCD2120_180', 'FANCD2180_240',
'FANCD2240_300', 'FANCD2300_360', 'FANCD2360_420', 'FANCD2420_480', 'FANCD2480_540',
'FANCD2540_600', 'FANCD2600_660', 'FANCD2660_720', 'FANCD2720_780', 'FANCD2780_840',
'FANCD2840_900', 'FANCD2900_960', 'FANCD2960_1020', 'FANCD21020_1080', 'FANCD21080_1140',
'FANCD21140_1200', 'FANCD21200_1260', 'FANCD21260_1320', 'FANCD21320_1380', 'FANCD21380_1440',
'FANCD21440_1500', 'FANCD21500_1566', 'FANCD2_alignment']
谢!
谢!
谢!
各路大神!
import numpy as np
from matplotlib import pyplot as plt
import PPI as ppi
from sklearn import manifold
import xlrd
import xlwt
# Fanconia Anemia proteins files, D2.fasta, L.fasta,
# shall exist in the sub-folder ./PPIData. These files contain the corresponding protein sequences from Fanconia Anemia
proteinNames = ['FANCE1020_1080', 'FANCD21_60', 'FANCD260_120', 'FANCD2120_180', 'FANCD2180_240',
'FANCD2240_300', 'FANCD2300_360', 'FANCD2360_420', 'FANCD2420_480', 'FANCD2480_540',
'FANCD2540_600', 'FANCD2600_660', 'FANCD2660_720', 'FANCD2720_780', 'FANCD2780_840',
'FANCD2840_900', 'FANCD2900_960', 'FANCD2960_1020', 'FANCD21020_1080', 'FANCD21080_1140',
'FANCD21140_1200', 'FANCD21200_1260', 'FANCD21260_1320', 'FANCD21320_1380', 'FANCD21380_1440',
'FANCD21440_1500', 'FANCD21500_1566', 'FANCD2_alignment']
n = len(proteinNames)
n = len(proteinNames)
distM = np.zeros([n, n])
distV = []
for i in range(0, n):
nameA = proteinNames[i]
proteinsA = ppi.getAllSequences(nameA) # The Fanconi Anemia file: one file contain the same protein for different geneomes
print(nameA, len(proteinsA))
for j in range(0, n):
nameB = proteinNames[j]
proteinsB = ppi.getAllSequences(nameB)
print(nameB, len(proteinsB))
dist = 1 - ppi.scorePPITreesP(proteinsA, proteinsB)
distV.append(dist)
distM[i, j] = dist
print(nameA, nameB, dist)
print(ppi.scorePPITreesP(proteinsA, proteinsB))
print(distM)
print()