2021-07-22 17:15

# 合并坐标，考虑二元素

``````use strict;
use warnings;
my @name_array ;
my \$hash;
open (FILE, "trf.gff");
while(<FILE>){
my (\$scaff, \$a, \$b, \$min, \$max, \$c, \$d, \$e, \$f, \$g, \$class, \$h, \$i, \$j) = split(/\t|;/);   #是对的
unless (grep \$_->[8] eq \$class ,@name_array){
push  @name_array, [\$scaff, \$a, \$b, \$c, \$d, \$e, \$f, \$g, \$class, \$h, \$i, \$j];
} ;

(\$min,\$max) = (\$max,\$min) if \$min > \$max ;

if(\$hash->{\$class}){
sort_matrix(\$min,\$max,\$hash->{\$class});
}
else{
push @{\$hash->{\$class}},[\$min,\$max];
}

}
###print sort result
for my \$item( @name_array ){
my \$class = \$item->[8];
for (0..\$#{\$hash->{\$class}}){
print \$item->[0]," ",\$item->[1]," ",\$item->[2]," ",\$hash->{\$class}->[\$_][0]," ",\$hash->{\$class}->[\$_][1]," ",\$item->[3]," ",\$item->[4]," ",\$item->[5]," ",\$item->[6]," ",\$item->[7]," ",\$class,"_E",(\$_+1)," ",\$item->[9]," ",\$item->[10]," ",\$item->[11],"\n";
}
}

sub sort_matrix{
my (\$cur_min,\$cur_max,\$matrix) = @_;
# record index
my @splice_index;
for (0..\$#\$matrix){
my \$item_min = \$matrix->[\$_]->[0];
my \$item_max = \$matrix->[\$_]->[1];
next if \$cur_max < \$item_min or \$cur_min > \$item_max;
return if \$cur_min >= \$item_min and \$cur_max <= \$item_max;
\$cur_min  = \$cur_min < \$item_min ? \$cur_min : \$item_min;
\$cur_max = \$cur_max < \$item_max ? \$item_max : \$cur_max;
push @splice_index, \$_;
}
push @\$matrix,[\$cur_min,\$cur_max];
#print "@splice_index\n";

for (0..\$#splice_index){
splice(@\$matrix,\$splice_index[\$_]-\$_,1);##This is trick
}
#print \$cur_min,":",\$cur_max,"\n";
}

``````
1条回答

• sanzhong104204 2021-07-23 07:57

坐标是啥，染色体又是啥？两种重复序列是什么意思？另外，只考虑perl吗？其他语言实现ok不？

