// a skeleton implementation of a tokeniser
#include "tokeniser.h"
#include <iostream>
#include <ctype.h>
// to shorten the code
using namespace std ;
////////////////////////////////////////////////////////////////////////
namespace Assignment_Tokeniser
{
// is the token of the given kind or does it belong to the given grouping?
bool token_is_in(Token token,TokenKind kind_or_grouping)
{
TokenKind kind = token_kind(token) ;
// check identity first
if ( kind == kind_or_grouping ) return true ;
switch(kind_or_grouping)
{
default:
return false ;
}
}
// the current input character, initiliased to ' ' which we ignore
// it is an int so that the EOF marker is not confused with a legal character
static int ch = ' ' ;
// the current line number and column, initialised to line 1 column 0
static int line_num = 1 ;
static int column_num = 0 ;
// the line number and column for the first character in the current token
static int start_line = 0 ;
static int start_column = 0 ;
// generate a context string for the given token
// it shows the line before the token,
// the line containing the token, and
// a line with a ^ marking the token's position
// tab stops are every 8 characters
// in the context string, tabs are replaced by spaces (1 to 8)
// so that the next character starts on an 8 character boundary
string token_context(Token token)
{
return "" ;
}
// read next character if not at the end of input
// and update the line and column numbers
static void nextch()
{
extern int read_char() ;
if ( ch == EOF ) return ;
if ( ch == '\n' ) // if last ch was newline ...
{
line_num++ ; // increment line number
column_num = 0 ; // reset column number
}
ch = getchar() ; // read the next character from stdin
column_num++ ; // increment the column number
}
////////////////////////////////////////////////////////////////////////
// called when we find end of input or we have a bad token
Token parse_eoi()
{
// simulate end of input in case this is handling a bad token rather than a real end of input
ch = EOF ;
// return an eoi token
return new_token(tk_eoi,"",start_line,start_column) ;
}
// return the next token object by reading more of the input
Token next_token()
{
// you must read input using the nextch() function
// the last character read is in the static variable ch
// always read one character past the end of the token being returned
// this loop reads one character at a time until it reaches end of input
while ( ch != EOF )
{
start_line = line_num ; // remember current position in case we find a token
start_column = column_num ;
switch(ch) // ch is always the next char to read
{
case ' ': // ignore space, tab, CR and LF
case '\t':
case '\r':
case '\n':
nextch() ; // read one more character and try again
break ;
// add additional case labels here for characters that can start tokens
// call a parse_* function to complete and return each kind of token
default:
return parse_eoi() ; // the next character cannot start a token, return an EOI token
}
}
start_line = line_num ; // remember current position so EOI token is correct
start_column = column_num ;
return parse_eoi() ; // return an EOI token
}
}
在这个代码片中,添加 indentifier,integer的token
要求:
1.所有输入都必须用nextch()函数。
2.如果达到输入结束,则return tk_eoi()
3.如果发现一个字符不能作为token的一部分,或者不是space" "、tab“\t",carriage return“\r”或newline“\n”,则return token tk_eoi()
4.所有token必须是输入中的连续字符
5.搜索下一个token的开始时,所有space、tab,carriage return和newline都将被忽略。