#pragma once
#include<string.h>
#include<stdio.h>
struct Data
{
public:
char* name;
int num;
char* type;
};
class ChainListNode
{
public:
Data data;
ChainListNode* nextNode;
};
class ChainListHeadNode
{
public:
ChainListHeadNode()
{
nextNode = nullptr;
}
ChainListNode* nextNode;
};
class ChainList
{
public:
ChainListHeadNode* firstNode;
ChainList()
{
length = 0;
firstNode = new ChainListHeadNode;
firstNode->nextNode = nullptr;
}
~ChainList()
{
ChainListNode* current;
current = firstNode->nextNode;
while (firstNode->nextNode)
{
current = current->nextNode;
delete firstNode->nextNode;
firstNode->nextNode = current;
}
delete firstNode;
}
bool Insert(char* newName, int newNum, char* newType)
{
ChainListNode* newNode = new ChainListNode;
newNode->data.name = _strdup(newName);
newNode->data.num = newNum;
newNode->data.type = _strdup(newType);
newNode->nextNode = firstNode->nextNode;
firstNode->nextNode = newNode;
length++;
return true;
}
ChainListNode* Search(char* searchKey)
{
ChainListNode* current = firstNode->nextNode;
for (int i = 0; i < length; i++)
{
if (strcmp(searchKey, current->data.name) == 0)
return current;
if (current->nextNode == nullptr)
return nullptr;
current = current->nextNode;
}
return nullptr;
}
int getListLength()
{
return length;
}
private:
int length;
};
#define _CRT_SECURE_NO_WARNINGS;
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include <string>
#include"head.h"
using namespace std;
char KEYWORD[] = "保留字";
char ID[] = "标识符";
char NUM[] = "常数";
char SEPARATOR[] = "分隔符号";
char ARITHMETIC[] = "算术运算符";
char RELATIONAL[] = "关系运算符";
char SPECIAL[] = "特殊符号";
char keyWord[200][200] = {"abstract","auto","break","case","char","class","const",
"continue","default","delete","double","enum","extern","f","F","far","final",
"float","for","friend","goto","if","inline","int","interrupt","l","L","long",
"new","operator","private","protected","public","register","return","short",
"signed","sizeof","static","struct","switch","typedef","u","U","union","unsigned",
"virtual","void","while","main","else","wchar_t"};
char entry[200] = "";
FILE* cstream, * outstream;
int sEnd[] = {2,3,4,6,7,8,11,12,14,15,17,18,20,21,23,24,26,27,29,30,32,33,35,36,37,45,47,48};
int currentNum = 0;
int numOfKeyWord = 0;
int numOfID = 0;
int numOfNum = 0;
int numOfSeparator = 0;
int numOfArithmetic = 0;
int numOfRelational = 0;
int numOfSpecial = 0;
bool ifflag = true;
ChainList keyWordLt;
ChainList keyWordList;
ChainList idList;
ChainList numList;
ChainList separatprList;
ChainList arithmeticList;
ChainList relationalList;
ChainList specialList;
int move(int s, char ch);
int getLength(char entry[]);
void AutoForLexical(FILE* cstream, FILE* outstream);
int findEnd(int s);
int main()
{
if ((cstream = fopen("d:\\sample.txt", "r")) == NULL)
{
printf("Failed to open sample.txt!");
exit(0);
}
if ((outstream = fopen("d:\\text1.txt", "w")) == NULL)
{
printf("Failed to open text1.txt");
exit(0);
}
while (strcmp("", keyWord[currentNum]) != 0)
{
keyWordLt.Insert(keyWord[currentNum], currentNum, KEYWORD);
currentNum++;
}
AutoForLexical(cstream, outstream);
ChainListNode* temp;
fprintf(outstream,"%s \n","标识符表");
temp = idList.firstNode->nextNode;
for (int i = 0; i <= idList.getListLength() - 1; i++)
{
fprintf(outstream, "%d %s %s\n", temp->data.num,temp->data.name,temp->data.type);
//printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "保留字");
temp = keyWordLt.firstNode->nextNode;
for (int i = 0; i <= keyWordLt.getListLength() - 1; i++)
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
//printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "常数表");
temp = numList.firstNode->nextNode;
for (int i = 0; i <= numList.getListLength() - 1; i++)
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
//printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "分隔符号表");
temp = separatprList.firstNode->nextNode;
for (int i = 0; i <= separatprList.getListLength() - 1; i++)
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
//printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "算术运算符表");
temp = arithmeticList.firstNode->nextNode;
for (int i = 0; i <= arithmeticList.getListLength() - 1; i++)
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
//printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "关系运算符表");
temp = relationalList.firstNode->nextNode;
for (int i = 0; i <= relationalList.getListLength() - 1; i++)
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
//printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fprintf(outstream, "%s \n", "特殊符号表");
temp = specialList.firstNode->nextNode;
for (int i = 0; i <= specialList.getListLength() - 1; i++)
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
//printf("%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
printf("\n");
}
fclose(cstream);
fclose(outstream);
system("pause");
}
void AutoForLexical(FILE* cstream, FILE* outstream)
{
char ch;
int s0 = 0;
int s = s0;
memset(entry, '\0', sizeof(entry));
ch = fgetc(cstream);
while (ch!=EOF)
{
s = move(s, ch);
if (findEnd(s) != -1)
{
if (s >= 2 && s <= 18)//算术运算符
{
ChainListNode* temp = arithmeticList.Search(entry);
if (temp==nullptr)
{
arithmeticList.Insert(entry, numOfArithmetic, ARITHMETIC);
numOfArithmetic++;
}
else
{
fprintf(outstream,"%d %s %s\n",temp->data.num,temp->data.name,temp->data.type);
}
}
else if (s==48)//特殊字符
{
ChainListNode* temp = specialList.Search(entry);
if (temp == nullptr)
{
specialList.Insert(entry, numOfSpecial, SPECIAL);
numOfSpecial++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else if (s==37)//分隔符号
{
if ((int)ch == 126 || (int)ch == 94)
{
ChainListNode* temp = arithmeticList.Search(entry);
if (temp == nullptr)
{
arithmeticList.Insert(entry, numOfArithmetic, ARITHMETIC);
numOfArithmetic++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else if ((int)ch == 34 || (int)ch == 39)
{
ChainListNode* temp = specialList.Search(entry);
if (temp == nullptr)
{
specialList.Insert(entry, numOfSpecial, SPECIAL);
numOfSpecial++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else
{
ChainListNode* temp = separatprList.Search(entry);
if (temp == nullptr)
{
separatprList.Insert(entry, numOfSeparator, SEPARATOR);
numOfSeparator++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
}
else if (s == 30)
{
ChainListNode* temp = separatprList.Search(entry);
if (temp == nullptr)
{
separatprList.Insert(entry, numOfSeparator, SEPARATOR);
numOfSeparator++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else if (s>=20 && s<=36 && s!=30)//关系运算符
{
ChainListNode* temp = relationalList.Search(entry);
if (temp == nullptr)
{
relationalList.Insert(entry, numOfRelational, RELATIONAL);
numOfRelational++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else if (s==47)//标识符和保留字
{
if (keyWordLt.Search(entry))
{
ChainListNode* temp = keyWordList.Search(entry);
if (temp == nullptr)
{
keyWordList.Insert(entry, numOfKeyWord, KEYWORD);
numOfKeyWord++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
else
{
ChainListNode* temp = idList.Search(entry);
if (temp == nullptr)
{
idList.Insert(entry, numOfID, ID);
numOfID++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
}
else if (s==45)//常数
{
ChainListNode* temp = numList.Search(entry);
if (temp == nullptr)
{
numList.Insert(entry, numOfNum, NUM);
numOfNum++;
}
else
{
fprintf(outstream, "%d %s %s\n", temp->data.num, temp->data.name, temp->data.type);
}
}
memset(entry,'\0',sizeof(entry));
s = s0;
}
if (ifflag)
ch = fgetc(cstream);
else
ifflag = true;
}
}
int move(int s, char ch)
{
if ((int)ch == 43 && s == 0)
{
entry[getLength(entry)] = ch;
return 1;
}
else if((int)ch == 61 && s == 1)
{
entry[getLength(entry)] = ch;
return 2;
}
else if ((int)ch == 43 && s == 1)
{
entry[getLength(entry)] = ch;
return 3;
}
else if (s == 1)
{
ifflag = false;
return 4;
}
else if ((int)ch == 38 && s == 0)
{
entry[getLength(entry)] = ch;
return 5;
}
else if ((int)ch == 61 && s == 5)
{
entry[getLength(entry)] = ch;
return 6;
}
else if ((int)ch == 62 && s == 5)
{
entry[getLength(entry)] = ch;
return 7;
}
else if ((int)ch == 38 && s == 5)
{
entry[getLength(entry)] = ch;
return 8;
}
else if (s == 5)
{
ifflag = false;
return 9;
}
else if ((int)ch == 42 && s == 0)
{
entry[getLength(entry)] = ch;
return 10;
}
else if ((int)ch == 61 && s == 10)
{
entry[getLength(entry)] = ch;
return 11;
}
else if (s == 10)
{
ifflag = false;
return 12;
}
else if ((int)ch == 40 && s == 0)
{
entry[getLength(entry)] = ch;
return 13;
}
else if ((int)ch == 61 && s == 13)
{
entry[getLength(entry)] = ch;
return 14;
}
else if (s == 13)
{
ifflag = false;
return 15;
}
else if ((int)ch == 37 && s == 0)
{
entry[getLength(entry)] = ch;
return 16;
}
else if ((int)ch == 61 && s == 16)
{
entry[getLength(entry)] = ch;
return 17;
}
else if (s == 16)
{
ifflag = false;
return 18;
}
else if ((int)ch == 33 && s == 0)
{
entry[getLength(entry)] = ch;
return 19;
}
else if ((int)ch == 61 && s == 19)
{
entry[getLength(entry)] = ch;
return 20;
}
else if (s == 19)
{
ifflag = false;
return 21;
}
else if ((int)ch == 38 && s == 0)
{
entry[getLength(entry)] = ch;
return 22;
}
else if ((int)ch == 38 && s == 22)
{
entry[getLength(entry)] = ch;
return 23;
}
else if (s == 22)
{
ifflag = false;
return 24;
}
else if ((int)ch == 124 && s == 0)
{
entry[getLength(entry)] = ch;
return 25;
}
else if ((int)ch == 124 && s == 25)
{
entry[getLength(entry)] = ch;
return 26;
}
else if (s == 25)
{
ifflag = false;
return 27;
}
else if ((int)ch == 61 && s == 0)
{
entry[getLength(entry)] = ch;
return 28;
}
else if ((int)ch == 61 && s == 28)
{
entry[getLength(entry)] = ch;
return 29;
}
else if (s == 28)
{
ifflag = false;
return 30;
}
else if ((int)ch == 62 && s == 0)
{
entry[getLength(entry)] = ch;
return 31;
}
else if ((int)ch == 61 && s == 31)
{
entry[getLength(entry)] = ch;
return 32;
}
else if (s == 31)
{
ifflag = false;
return 33;
}
else if ((int)ch == 60 && s == 0)
{
entry[getLength(entry)] = ch;
return 34;
}
else if ((int)ch == 61 && s == 34)
{
entry[getLength(entry)] = ch;
return 35;
}
else if (s == 34)
{
ifflag = false;
return 36;
}
else if (((int)ch == 34 || (int)ch == 40 || (int)ch == 41 || (int)ch == 44 || (int)ch == 39 || (int)ch == 58 || (int)ch == 91 || (int)ch == 93 || (int)ch == 94 || (int)ch == 123 || (int)ch == 125 || (int)ch == 126 || (int)ch==46 || (int)ch==59 || (int)ch == 92) && s==0)
{
entry[getLength(entry)] = ch;
return 37;
}
else if (((int)ch == 43 || (int)ch == 38) && s == 0)
{
entry[getLength(entry)] = ch;
return 38;
}
else if (((int)ch >= 48 && (int)ch <= 57) && s == 0)
{
entry[getLength(entry)] = ch;
return 39;
}
else if (!((int)ch >= 48 && (int)ch <= 57) && s == 38)
{
entry[getLength(entry) - 1] = '\0';
ifflag = false;
return 0;
}
else if (((int)ch >= 48 && (int)ch <= 57) && s == 38)
{
entry[getLength(entry)] = ch;
return 39;
}
else if (((int)ch >= 48 && (int)ch <= 57) && s == 39)
{
entry[getLength(entry)] = ch;
return 39;
}
else if ((int)ch == 39 && s == 39)
{
entry[getLength(entry)] = ch;
return 40;
}
else if ((int)ch == 101 && s == 39)
{
entry[getLength(entry)] = ch;
return 42;
}
else if (s == 39)
{
entry[getLength(entry)] = ch;
return 45;
}
else if (((int)ch >= 48 && (int)ch <= 57) && s == 40)
{
entry[getLength(entry)] = ch;
return 41;
}
else if (((int)ch >= 48 && (int)ch <= 57) && s == 41)
{
entry[getLength(entry)] = ch;
return 41;
}
else if ((int)ch == 101 && s == 41)
{
entry[getLength(entry)] = ch;
return 42;
}
else if (s == 41)
{
ifflag = false;
return 45;
}
else if (((int)ch == 43 || (int)ch == 38) && s == 42)
{
entry[getLength(entry)] = ch;
return 43;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 42)
{
entry[getLength(entry)] = ch;
return 44;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 43)
{
entry[getLength(entry)] = ch;
return 44;
}
else if (((int)ch >= 41 && (int)ch <= 57) && s == 44)
{
entry[getLength(entry)] = ch;
return 44;
}
else if (s == 44)
{
ifflag = false;
return 45;
}
else if (((int)ch == 95 || ((int)ch >= 65 && (int)ch <= 90) || ((int)ch >= 97 && (int)ch <= 122)) && s == 0)
{
entry[getLength(entry)] = ch;
return 46;
}
else if (((int)ch == 95 || ((int)ch >= 65 && (int)ch <= 90) || ((int)ch >= 97 && (int)ch <= 122) || ((int)ch >= 48 && (int)ch <= 57)) && s == 46)
{
entry[getLength(entry)] = ch;
return 46;
}
else if (s == 46)
{
ifflag = false;
return 47;
}
else if (s == 0 && ((int)ch == 0 || (int)ch == 7 || (int)ch == 8 || (int)ch == 9 || (int)ch == 10 || (int)ch == 11 || (int)ch == 12 || (int)ch == 13 || (int)ch == 34 || (int)ch == 39 || (int)ch == 63 || (int)ch == 92))
{
entry[getLength(entry)] = ch;
return 48;
}
else if (s == 0)
{
ifflag = false;
return 0;
}
}
int findEnd(int s)
{
int low = 0;
int high = 27;
while (low <= high)
{
int mid = (low + high) / 2;
if (s == sEnd[mid])
return sEnd[mid];
else
if (s > sEnd[mid])
low = mid + 1;
else
high = mid - 1;
}
return -1;
}
int getLength(char ch[])
{
int length = 0;
while (ch[length] != '\0')
length++;
return length;
}
这是一个进行简单词法分析的程序。本人设计一个bool型全局变量ifflag,表示是否使用当前从文件流读取的字符串ch,若ifflag值为true,则表示当前ch被使用过,下一次循环时执行ch=fgetc(cstream),否则重置ifflag的值为true。而move()函数中if的各个分支执行ifflag=false。 但是我在执行的过程中发现,程序在ifflag第一次被置false后值就不在发生改变。
用于被识别的简单程序如下:
void main()
{
int i,j,sum;
i=j=1;
sum=10;
sum=i+j;
wchar_t wch="汉字"
printf("%d\n",sum);
}