verilog使用两个for循环嵌套的代码,可以仿真而且计算结果正确,综合的时候却核心模块缺失。
已经尝试过减少一个for循环,还是无法综合。而且不报错,查vivado的log报告也找不到问题。
自己核对了没有什么硬件不能实现的语言。
值得一提的是,在这之前写了单个for循环的代码(是我把纯流水线的代码改为CYCLE迭代复用形式的前身),这个代码是完全可以综合的,算法内容完全一致。
还请各位帮忙看下或者提供些经验性建议……感恩
// FOR LATENCY == CMAX, high latency case
module sqrt_binary_high_v1_0 #(parameter SIZE = 8'd30, HALF_SIZE = 8'd15, LATENCY_CONFIG = 15, CYCLE_CONFIG_MAX = 15, CYCLE = 4) (
input i_clk,
input i_aclken,
input i_rst_n,
input [SIZE-1 : 0] i_p,
output [HALF_SIZE - 1:0] o_u
);
localparam REMAINDER = CYCLE_CONFIG_MAX % CYCLE; // r1
localparam FOR = (CYCLE_CONFIG_MAX % CYCLE == 0) ? (CYCLE_CONFIG_MAX / CYCLE) : (CYCLE_CONFIG_MAX / CYCLE + 1); // q1
localparam REMAIN_LATENCY = CYCLE_CONFIG_MAX - (CYCLE - 1);
wire [HALF_SIZE-1:0] u [HALF_SIZE-1:0];
wire [SIZE-1:0] r_cycle [CYCLE_CONFIG_MAX-1:0];
wire [SIZE-1:0] p_cycle [CYCLE_CONFIG_MAX-1:0];
wire [HALF_SIZE-1:0] u_cycle [CYCLE_CONFIG_MAX-1:0];
assign u[0][HALF_SIZE-1] = (i_p[SIZE-1:SIZE-2] >= 2'b01);
assign p_cycle[0][SIZE-1:SIZE-2] = i_p[SIZE-1:SIZE-2] - 2'b01;
assign p_cycle[0][SIZE-3:0] = i_p[SIZE-3:0];
assign u_cycle[0][HALF_SIZE-1] = (i_p[SIZE-1:SIZE-2] >= 2'b01);
generate
genvar i, j;
for (i = 0; i < CYCLE; i = 1 + i) begin: loop_cycle
for (j = 0; j < FOR; j = 1 + j) begin: loop_logic
if((i*FOR+j)<= CYCLE_CONFIG_MAX-2)begin
ipsxe_floating_point_register_v1_0 #(SIZE + HALF_SIZE) u_register(
.i_clk(i_clk),
.i_aclken(i_aclken),
.i_rst_n(i_rst_n),
.i_d({p_cycle[i*FOR+j], u_cycle[i*FOR+j] }),
.o_q({r_cycle[i*FOR+j], u_cycle[i*FOR+j+1]})
);
assign p_cycle[i*FOR+j+1][(SIZE-1-(i*FOR+j+2)*2+1)+:(i*FOR+j+4)] = u_cycle[i*FOR+j+1][HALF_SIZE-(i*FOR+j+1)] ? (r_cycle[i*FOR+j][SIZE-(i*FOR+j+1)-:(i*FOR+j+4)] - {1'b0, u_cycle[i*FOR+j+1][HALF_SIZE-1-:(i*FOR+j+1)], 2'b01}) : (r_cycle[i*FOR+j][SIZE-(i*FOR+j+1)-:(i*FOR+j+4)] + {1'b0, u_cycle[i*FOR+j+1][HALF_SIZE-1-:(i*FOR+j+1)], 2'b11});
if (i*FOR+j < HALF_SIZE - 2)begin
assign p_cycle[i*FOR+j+1][SIZE-1-(i*FOR+j+2)*2:0] = r_cycle[i*FOR+j][SIZE-1-(i*FOR+j+2)*2:0];
end
assign u_cycle[i*FOR+j+1][HALF_SIZE-1-(i*FOR+j+1)] = ~p_cycle[i*FOR+j+1][SIZE-(i*FOR+j+1)];
assign u[i*FOR+j] = u_cycle[i*FOR+j];
end
else begin
end
end
end
if(REMAIN_LATENCY != 0)begin
ipsxe_floating_point_distributed_shiftregister_wrapper_v1_0 #(REMAIN_LATENCY,HALF_SIZE) u_sr_output(
.din(u[HALF_SIZE-1]),
.clk(i_clk),
.i_aclken(i_aclken),
.rst(~i_areset_n),
.dout(o_u)
);
end
else begin
assign o_u = u[HALF_SIZE-1];
end
endgenerate
endmodule