是一个搜索引擎,用户输入关键字,在txt文件中查找
#define __STDC_WANT_LIB_EXT1__ 1
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#define COUNT 5
#define LENGTH 256
char delimiters[] = " \n";
bool find = false;
typedef struct File
{
FILE *pfile;
char *filename;
}File;
File query = {.filename = "query.txt", .pfile = NULL};
typedef struct Tokenize
{
int count;
char **pwords;
}Tokenize;
int tokenizing(char *, char **);
int get_file(char *, int);
void open_file(FILE *, char *, char *);
void set_position(FILE *);
int get_content(char **, int, FILE *);
bool core_search(int, int, char **, char **);
bool exact_search(int, int, char **, char **);
void free_memory(char **, int);
int main(int argc, char *argv[])
{
char mode_1[2][10] = {"manual", "script"};
char mode_2[4][15] = {"coreSearch", "exactSearch", "topSearch", "topKSearch"};
char QA[LENGTH];
char buf[LENGTH];
int fileID = 0;
int queryID = 0;
bool GET;
File data;
Tokenize qa;
Tokenize content;
if(strcmp(argv[1], mode_1[0]) == 0)
{
printf("\n\nEnter query terms with up to %d characters per line.\n"
"Terminate input by entering an empty line:\n", LENGTH - 1);
while(true)
{
fgets(buf, LENGTH, stdin);
if(buf[0] == '\n')
break;
if(strcat_s(QA, LENGTH, buf))
{
printf("Something's wrong. Maximum permitted input length exceeded.\n");
return 1;
}
}
qa.count = tokenizing(QA, qa.pwords);
while(get_file(data.filename, fileID))
{
open_file(data.pfile, data.filename, "r");
set_position(data.pfile);
while(get_content(content.pwords, content.count, data.pfile))
{
if(strcmp(argv[2], mode_2[0]) == 0)
{
GET = core_search(qa.count, content.count, qa.pwords, content.pwords);
if(!GET) printf("Core Search: d%d\n", fileID);
}
if(strcmp(argv[2], mode_2[1]) == 0)
{
GET = exact_search(qa.count, content.count, qa.pwords, content.pwords);
if(!GET) printf("Exact Search: d%d\n", fileID);
}
free_memory(content.pwords, content.count);
fclose(data.pfile);
}
}
free_memory(qa.pwords, qa.count);
}
if(strcmp(argv[1], mode_1[1]) == 0)
{
open_file(query.pfile, query.filename, "w");
printf_s("\n\nPlease create your query file. \nEnter search queries ",
"(1 per line) identified by the query ID or press Enter to end:\n");
while(true)
{
fgets(QA, LENGTH, stdin);
if(QA[0] == '\n')
break;
if(EOF == fputs(QA, query.pfile))
{
printf_s("Error writing file.\n");
return 1;
}
}
fclose(query.pfile);
open_file(query.pfile, query.filename, "r");
while(fgets(QA, LENGTH, query.pfile))
{
++ queryID;
qa.count = tokenizing(QA, qa.pwords);
while(get_file(data.filename, fileID))
{
open_file(data.pfile, data.filename, "r");
set_position(data.pfile);
while(get_content(content.pwords, content.count, data.pfile))
{
if(strcmp(argv[2], mode_2[0]) == 0)
{
GET = core_search(qa.count, content.count, qa.pwords, content.pwords);
if(!GET) printf("Core Search: q%d d%d\n", queryID, fileID);
}
if(strcmp(argv[2], mode_2[1]) == 0)
{
GET = exact_search(qa.count, content.count, qa.pwords, content.pwords);
if(!GET) printf("Exact Search: q%d d%d\n", queryID, fileID);
}
free_memory(content.pwords, content.count);
fclose(data.pfile);
}
}
free_memory(qa.pwords, qa.count);
}
}
return 0;
}
int tokenizing(char *str, char **pWords)
{
char *ptr = NULL;
size_t maxWords = 10;
int wordCount = 0;
size_t wordLength = 0;
pWords = calloc(maxWords, sizeof(char*));
size_t str_len = strnlen_s(str, LENGTH);
char *pWord = strtok_s(str, &str_len, delimiters, &ptr);
bool newWord = true;
if(!pWord)
{
printf("No words found. Ending program.\n");
exit (1);
}
while(pWord)
{
for(int i = 0 ; i < wordCount ; ++i)
if(strcmp(*(pWords + i), pWord) == 0)
{
newWord = false;
break;
}
if(newWord)
{
if(wordCount == maxWords)
{
maxWords += COUNT;
pWords = realloc(pWords, maxWords*sizeof(char*));
}
wordLength = ptr - pWord;
*(pWords + wordCount) = malloc(wordLength);
strcpy_s(*(pWords + wordCount), wordLength, pWord);
++ wordCount;
}
else newWord = true;
pWord = strtok_s(NULL, &str_len, delimiters, &ptr);
}
return wordCount;
}
int get_file(char *filename, int num)
{
char str_num[5];
size_t size = 10;
char head[3] = "00";
char head_2[2] = "0";
char extension[5] = ".txt";
filename = malloc(size);
FILE *pfile = NULL;
num = 0;
++num;
if(num < 10)
{
strcpy_s(filename, sizeof(filename), head);
sprintf(str_num, "%d", num);
strcat_s(filename, sizeof(filename), str_num);
strcat_s(filename, sizeof(filename), extension);
}
else if(num >= 10 && num < 100)
{
strcpy_s(filename, sizeof(filename), head_2);
sprintf(str_num, "%d", num);
strcat_s(filename, sizeof(filename), str_num);
strcat_s(filename, sizeof(filename), extension);
}
else
{
sprintf(filename, "%d", num);
strcat_s(filename, sizeof(filename), extension);
}
}
void open_file(FILE *pfile, char *filename, char *mode)
{
if(fopen_s(&pfile, filename, mode))
{
printf_s("Error opening %s for writing. Program terminated.\n", query.filename);
exit(1);
}
setbuf(pfile, NULL);
}
void set_position(FILE *pfile)
{
int offset = -1;
while(true)
{
fseek(pfile, offset, SEEK_SET);
if(fgetc(pfile) == '>')
{
-- offset;
fseek(pfile, offset, SEEK_SET);
if(fgetc(pfile) == '>') break;
}
--offset;
}
}
int get_content( char **pWords, int wordCount, FILE *pfile)
{
char *pContent = malloc(LENGTH);
fgets(pContent, LENGTH, pfile);
char *ptr = NULL;
size_t maxWords = 50;
wordCount = 0;
size_t wordLength = 0;
size_t str_len = strnlen_s(pContent, LENGTH);
char *pWord = strtok_s(pContent, &str_len, delimiters, &ptr);
pWords = calloc(maxWords, LENGTH);
if(!pWord) return 0;
while(pWord)
{
if(wordCount == maxWords)
{
maxWords += COUNT;
pWords = realloc(pWords, maxWords*sizeof(char*));
}
wordLength = ptr - pWord;
*(pWords + wordCount) = malloc(wordLength);
strcpy_s(*(pWords + wordCount), wordLength, pWord);
++ wordCount;
pWord = strtok_s(NULL, &str_len, delimiters, &ptr);
}
free(pContent);
pContent = NULL;
}
bool core_search(int count_1, int count_2, char **pWords_1, char **pWords_2)
{
for(int i = 0 ; i < count_1 ; ++ i )
{
for(int j = 0 ; j < count_2 ; ++ j)
{
if(strcmp(*(pWords_1 + i), *(pWords_2 + j)) == 0)
{
find = true;
break;
}
else find = false;
}
}
if(find) return true;
}
bool exact_search(int count_1, int count_2, char **pWords_1, char **pWords_2)
{
int FIND[count_1];
int sum = 0;
for(int i = 0 ; i < count_1 ; ++ i)
{
for(int j = 0 ; j < count_2 ; ++ j)
{
if(strcmp(*(pWords_1 + i), *(pWords_2 + j)) == 0)
{
find = true;
break;
}
else find = false;
}
if(find) FIND[i] = 1;
else FIND[i] = 0;
}
for(int i = 0 ; i < count_1 ; ++ i)
sum += FIND[i];
if(sum == count_1) return true;
}
void free_memory(char **pStrs, int count)
{
for(int i = 0; i < count ; ++i)
{
free(*(pStrs + i));
*(pStrs + i) = NULL;
}
free(pStrs);
pStrs = NULL;
}