参考代码写了一个lzw压缩程序,原先的txt文件4.8k,压缩后成为4.2k。可是无法还原,只能还原前面一部分。我用fgetc一个一个读出数据,发现到第291个数据就终止了,然后vs响15声。这是什么原因?
压缩输出的时候监测输出数据,291后面直到2000+个数据c都是正常的,被用fputc输出。复制粘贴到解压缩文件夹后就出问题了
解压缩的时候在第291个数据处开始读到EOF,之后所有的数据用fgetc读都是EOF。也就是说只有前面291个是正常数据,后面全写了-1进去,生成一个文件
附程序
//4.27添加修改0的模块,并避免溢出情形
#ifndef _COMPRESSDO_H
#define _COMPRESSDO_H
#endif
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
#define BOOL int
#define MAX_CODES 4096
#define TRUE 1
#define FALSE 0
#define NOT_USED 0
#define HASH_SIZE 4096
#define VERBOSE 1
#define MAXSTRING 1000
const int BYTE_SIZE = 8;
const int EXCESS = 4;
const int ALPHA= 256;
const int MASK = 15;
int overnum=0;
int bileftOver;
int bitsLeftOver = 0;
int flag;
unsigned int get(unsigned int key,int flag);
int fgetrealc();
unsigned int hash_code(unsigned int key,int flag);
void compress(char *Filename);
int compressdo(char *Filename);
void init_hashtable(void);
void output(unsigned int pcode,int flag);
//void print_binary(int i);
void put(unsigned int key, int flag,unsigned int b);
void output_encode(unsigned int pcode,int flag);
void set_file1(char *file);
struct HASH_TABLE{
unsigned int key;
int flag;
}table[HASH_SIZE];//table是在定义结构体时声明的实例,是一个长为4099的数组
//i是码字,key是字符值,flag代表字符值中前面0的个数
FILE *in, *out; //file point of the source and destination
int q[10];//因为是int,所以要判为>0才能输出
int l=-1;
const int MASK1 = 255;
const int MASK2 = 15;
unsigned int get(unsigned int key,int flag){//判断key是否字符表中已有
unsigned int b = hash_code(key,flag);
if(table[b].key == NOT_USED)
return NOT_USED;
return b;//两次返回实际上也是一次返回,并列两者只能二选其一并按顺序优先,满足if返回-1,不满足返回下一个
}
unsigned int hash_code(unsigned int key,int flag){//转一圈寻找与key相同的值或者相应-1?并返回相应j;如果始终不满足,就只好返回i
//也就是说,查找key是空字符还是已经定义的字符,并返回字符相应的编号;否则就再用一个新编号
unsigned int i = key%HASH_SIZE;
unsigned int j;
if(i<0)
j=i+HASH_SIZE;
else
j = i;
do{
if((table[j].key == key) & (table[j].flag==flag))
return j;
else
{if( table[j].key == NOT_USED )
return j;}
j = (j+1)%HASH_SIZE;
}while(j!=i);
return j;//这个两重返回是什么意思?实际上是一次返回,当满足a[j]==5,大括号内j返回给外面的j,j再返回给主函数
//如果始终不满足,则只好返回i值
}
void compress(){
int i, codeUsed, ccc,e;
unsigned int pcode, k;
init_hashtable();
for(i=0; i<ALPHA; i++)
put(i,0,i);
codeUsed = ALPHA;
ccc = fgetrealc();
if(ccc!=EOF){
pcode = ccc;
ccc = fgetrealc();
int j=0;
while(ccc!=EOF){
//j++;
//cout<<j<<endl;
if(pcode>16777215 )//避免溢出
{
output_encode(pcode,flag);
j++;
cout<<j<<endl;
flag=0;
pcode=ccc;}
else{
if(pcode ==0)
flag++;
k = (pcode<<BYTE_SIZE)+ccc;
e = get(k,flag);
if(e==NOT_USED) //not in dictionary
{
if(codeUsed<MAX_CODES )
put( k,flag,codeUsed++);
if(pcode==0)
output_encode(pcode,flag-1);
else
{output_encode(pcode,flag);
j++;
cout<<j<<endl;
}
flag=0;
pcode = ccc;
}
else
pcode = k;//这里会溢出
}
ccc=fgetrealc();
}
output_encode(pcode,flag);
j++;
cout<<j<<endl;
if(bitsLeftOver)
fputc(bileftOver<<EXCESS, out);
}
fclose(in);
fclose(out);
}
void init_hashtable(void){//初始化所有字符为未使用
int i;
for(i=0; i<HASH_SIZE; i++)
{table[i].key = NOT_USED;
table[i].flag=0;}
}
int fgetrealc()
{
int c,code;
int t=0;
if(l
{
c = fgetc(in);
// cout
if(c == EOF)
{
for(int i=9;i>=0;i--){
q[i]=fgetc(in);
if(q[i]==-1)
t++;
}
if(t==10)
return EOF;
else
l=9;
}
else
{
if(c
c+=256;
return c;
}
}
if(l>=0)
{
if(c<0)
c+=256;
code=c;
c=q[l];
l--;
return code;
}
}
void output_encode(unsigned int pcode,int flag){
unsigned int a=0,d=0,p;
p=get(pcode,flag);//这时才转换为12位
/*if(p==-1)
{cout<<"p==-1"<<endl;
system("pause");}*/
if(bitsLeftOver){
d = p & MASK1;
a = (bileftOver << EXCESS)+(p>>BYTE_SIZE);
//printf("%i - ", c);print_binary(c);
//printf("%i - ", d);print_binary(d);
fputc(a, out);
// cout<
fputc(d, out);
// cout
bitsLeftOver =0;
}
else{
bileftOver = p & MASK2;
a = p>>EXCESS;
//printf("%i - ", c);print_binary(c);
fputc(a, out);
// cout<<ferror(out)<<endl;
bitsLeftOver = 1;
}
}
//for debug used
void print_binary(int i){
int j;
for(j=7; j>=0; j--)
printf("%i", (i>>j)&1);
printf("\n");
}
void put(unsigned int key, int flag,unsigned int b){
// int b = hash_code(key,flag);
if(table[b].key == NOT_USED){
table[b].key = key;
table[b].flag = flag;
return ;
}
else{
if((table[b].key == key) & (table[b].flag==flag)){ //duplicate
//this should not happen
printf("Internal error occur during hashing:duplicate");
exit(1);
}
else{ //table is full
//this should not happen
printf("Internal error occur during hashing:table full");
exit(1);
}
}
}
void set_files1(char *file){
in = fopen(file,"r");
out=fopen("testtxt.lzw","w");
}
long get_file_size( char * filename ) {
struct stat f_stat;
if( stat( filename, &f_stat ) == -1 ){
return -1;
}
/*int stat(const char *path, struct stat *struct_stat);
int lstat(const char *path,struct stat *struct_stat);
两个函数的第一个参数都是文件的路径,第二个参数是struct stat的指针
这样就获取了文件各种参数存在f_stat里面
返回其中的st_size*/
return (long)f_stat.st_size;
}
void printusage (void) {
printf("Usage:lzw -flag source \n");
printf("flag should be encode or decode\n");
printf("When the flag is decode, the source file should be the format of \"*.lzw\" \n");
printf("Example: \"lzw -encode test.c\" ,then the output file would be test.c.lzw \n");
}
/************ Main Function *****************************************************************/
int compressdo(char *Filename){
cout<<"encoding"<<endl;
time_t tm1,tm2;
double filesize1,filesize2,lzwrate;
time(&tm1);
// cout<<ctime(&tm1)<<endl;
filesize1=get_file_size(Filename);
cout<<"filesize1="<<filesize1<<endl;
set_files1(Filename);
compress();
time(&tm2);
cout<<"timecost="<<difftime(tm2,tm1)<<endl;
filesize2=get_file_size("testtxt.lzw");
cout<<"filesize2="<<filesize2<<endl;
lzwrate=filesize1/filesize2;
cout<<"lzwrate="<<lzwrate<<endl;
system("pause");
return(0);
}
/*else if (temp_flag==1) { // decode
printf ("Decoding %s ......\n", argv[2]);
set_files_decode(outname);
tm = time(NULL);
printf(ctime(&tm));
decompress();
tm = time(NULL);
printf(ctime(&tm));
filesize=get_file_size(inname);
printf("%l",filesize);
return(0);
}
printusage();
return(1);
}*/