从文本文件A14s.txt读入一个字符串,进行行程码压缩后向文本文件A14t.txt入压缩后的十六进制串。行程码(Run-Length Encoding)是一种统计编码,适用于经常出现连续符号的数据压缩,编码规则为:对2-8个连续出现的相同字符,用一个计数值和该字符来代替。超过8个相同字符时,每次截取8个进行编码。出现单独字符时,以计数值1开头,再列举连续的单独字符,在出现第一个连续相同字符之前以计数值1结尾。输出编码结果时,计数值减1后以3位二进制数表示,字符以8位ASCII表示。例如3a6*2d表示为010 0110 0001 101 0010 1010 001 0110 0100,下划线表示计数值。最后按字节输出,即01001100, 00110100, 10101000, 10110010, 00000000(不足一个字节时末尾补0),十六进制形式为4c34a8b200。
下面这段C语言代码在实现上述要求时,在大多数情况下结果正确,却在个别情况下输出错误结果。这是为什么?
#include <stdio.h>
#include <stdlib.h>
int b2x(int* n) {
char result;
int x = n[0] * 8 + n[1] * 4 + n[2] * 2 + n[3];
if (x <= 9) {
result = x + 48;
}
else {
result = x + 87;
}
return result;
}
void num2b(int n, int length, int* nums) {
for (int i = 0; i < length; i++) {
nums[i] = (n >> (length - i - 1)) & 1;
}
}
struct num_chara {
int num;
int chara;
};
int main() {
FILE *fps, *fpt;
fps = fopen("A14s.txt", "r");
fpt = fopen("A14t.txt", "w");
long size;
fseek(fps, 0, SEEK_END);
size = ftell(fps);
rewind(fps);
char *text = (char*)malloc(size+1);
fgets(text, size+1, fps);
fclose(fps);
struct num_chara charas[size];
int x = 0, count = 1;
for (int i = 0; i < size; i++) {
if (text[i] != text[i+1]) {
if (count == 1) {
charas[x].num = 1;
charas[x].chara = text[i];
}
else {
charas[x].num = count;
charas[x].chara = text[i];
count = 1;
}
x += 1;
}
else {
count += 1;
}
}
int single = 0, signals[x]; // 1 -> start 1, 2 -> end 1, 3 -> start & end 1, xy -> over 8
for (int i = 0; i < x; i++) {
if (charas[i].num != 1) {
if (single == 1) {
signals[i-1] += 2;
single = 0;
}
if (charas[i].num > 8) {
int s = signals[i] / 8;
int r = signals[i] - 8 * s;
signals[i] = 10 * s + r;
}
else {
signals[i] = 0;
}
}
else if (single == 0) {
if (i == x - 1) {
signals[i] = 3;
}
else {
signals[i] = 1;
single = 1;
}
}
else {
signals[i] = 0;
}
}
int length = 11 * x;
single = 0;
for (int i = 0; i < x; i++) {
if (signals[i] == 3) {
length += 3;
}
else if (signals[i] == 1) {
single = 1;
}
else if (signals[i] == 2) {
single = 0;
}
else if (signals[i] != 0) {
int a = signals[i] / 10;
if (signals[i] == 10 * a) {
length += 11 * (a - 1);
}
else {
length += 11 * a;
}
}
else if (single == 1) {
length -= 3;
}
}
int l = length / 8;
int r = length - 8 * l;
int num0 = 0;
if (r != 0) {
num0 = 8 - r;
length += num0;
}
int codes[length], place = 0;
single = 0;
for (int i = 0; i < x; i++) {
if (signals[i] == 3) {
num2b(0, 3, &codes[place]);
num2b(charas[i].chara, 8, &codes[place+3]);
num2b(0, 3, &codes[place+11]);
place += 14;
}
else if (signals[i] == 1) {
single = 1;
num2b(0, 3, &codes[place]);
num2b(charas[i].chara, 8, &codes[place+3]);
place += 11;
}
else if (signals[i] == 2) {
single = 0;
num2b(charas[i].chara, 8, &codes[place]);
num2b(0, 3, &codes[place+8]);
place += 11;
}
else if (signals[i] != 0) {
int a = signals[i] / 10;
int b = signals[i] - 10 * a;
for (int time = 0; time < a; time++) {
num2b(7, 3, &codes[place]);
num2b(charas[i].chara, 8, &codes[place+3]);
place += 11;
}
if (b != 0) {
num2b(b - 1, 3, &codes[place]);
num2b(charas[i].chara, 8, &codes[place+3]);
place += 11;
}
}
else if (single == 1) {
num2b(charas[i].chara, 8, &codes[place]);
place += 8;
}
else {
num2b(charas[i].num - 1, 3, &codes[place]);
num2b(charas[i].chara, 8, &codes[place+3]);
place += 11;
}
}
for (int time = 0; time < num0; time++) {
codes[place] = 0;
place += 1;
}
for (int k = 0; k < length; k += 4) {
fputc(b2x(&codes[k]), fpt);
}
fclose(fpt);
free(text);
return 0;
}