import numpy as np
import scipy.stats as stats
#from sklearn.metrics import cohen_kappa_score
import sys
import os
impute = sys.argv[1]
wgs = sys.argv[2]
def cal_dr2():
os.system('bcftools query -f "%CHROM\t%POS[\t%DS]\n" 0002.vcf >3.vcf')
os.system('bcftools query -f "%CHROM\t%POS[\t%GT]\n" 0003.vcf >4.vcf')
os.system("grep -v '^#' 3.vcf |cut -f 3- >1.txt")
os.system("grep -v '^#' 4.vcf |cut -f 3- >2.txt")
with open('2.txt', 'r') as f2:
str2 = f2.read()
a2 = str2.replace('0|0','0').replace('0|1','1').replace('1|1','2').replace('1|0','1').replace('0/0','0').replace('0/1','1').replace('1/1','2').replace('1/0','1')
with open('4.txt', 'a') as f4:
f4.write(a2)
a = np.loadtxt('1.txt')
b = np.loadtxt('4.txt')
DR2 = {}
for i in range(0, a.shape[0]):
dr2 = stats.pearsonr(a[i], b[i])
DR2['{}'.format(i+1)] = dr2[0]
os.system("rm -r 1.vcf 2.vcf 3.vcf 4.vcf 1.txt 2.txt 3.txt 4.txt")
return DR2
def cal_r2_IQS():
os.system('bcftools query -f "%CHROM\t%POS[\t%GT]\n" 0002.vcf >3.vcf')
os.system('bcftools query -f "%CHROM\t%POS[\t%GT]\n" 0003.vcf >4.vcf')
os.system("grep -v '^#' 3.vcf |cut -f 3- >1.txt")
os.system("grep -v '^#' 4.vcf |cut -f 3- >2.txt")
with open('1.txt', 'r') as f2:
str2 = f2.read()
a2 = str2.replace('0|0','0').replace('0|1','1').replace('1|1','2').replace('1|0','1').replace('0/0','0').replace('0/1','1').replace('1/1','2').replace('1/0','1')
with open('3.txt', 'a') as f4:
f4.write(a2)
with open('2.txt', 'r') as f2:
str2 = f2.read()
a2 = str2.replace('0|0','0').replace('0|1','1').replace('1|1','2').replace('1|0','1').replace('0/0','0').replace('0/1','1').replace('1/1','2').replace('1/0','1')
with open('4.txt', 'a') as f4:
f4.write(a2)
GC = {}
R2 = {}
IQS = {}
a = np.loadtxt('3.txt')
b = np.loadtxt('4.txt')
for i in range(0, a.shape[0]):
c = a[i] == b[i]
num = str(c.tolist()).count("True")
gc = str(float(num) / len(c))
r2 = stats.pearsonr(a[i], b[i])
#iqs = cohen_kappa_score(a[i], b[i])
GC['{}'.format(i + 1)] = gc
R2['{}'.format(i + 1)] = r2[0]
#IQS['{}'.format(i + 1)] = iqs
os.system("rm -r 1.vcf 2.vcf 3.vcf 4.vcf 1.txt 2.txt 3.txt 4.txt")
return GC, R2
os.system('bcftools isec {} {} -p accuracy'.format(impute, wgs))
os.chdir('accuracy')
DR2 = cal_dr2()
GC, R2 = cal_r2_IQS()
#os.system("rm -r 0000.vcf 0001.vcf 0002.vcf 0003.vcf")
with open('result.txt', 'a') as f:
f.write('ID' + '\t' + 'GC' + '\t' + 'R2' + '\t' + 'DR2' + '\n')
for id in GC.keys():
f.write(id + '\t' + str(GC[id]) + '\t' + str(R2[id]) + '\t' + str(DR2[id]) + '\n')
请问怎么上面的代码修改下,想要计算Pearson相关系数的平方( R2 )