#include "tokeniser.h"
#include <ctype.h>
// to shorten the code
using namespace std ;
namespace Tokeniser
{
// parse end of input - report we have reached end of input
// simulate end of input to prevent further attempts to read input
static Token parse_eoi()
{
// if called after an error we may not be at the end of input so simulate it
ch = EOF ;
// return a new Token object
return new_token(tk_eoi,"") ;
}
// parse a single character that cannot start any other kind of token
static Token parse_byte()
{
// create a string from the current value of ch
string spelling(1,ch) ;
// always read the next character - we have read past the end of the token
nextch() ;
// return a new Token object
return new_token(tk_byte,spelling) ;
}
// ****** ADD NEW CODE HERE ******
// add your own parse_*() functions here ...
// return the next Token object by reading more of the input
// this function matches the next character, ch, against the start of one of the grammar rules
// it then calls the appropriate parse_*() function to complete the Token object
// you can use ranges such as case '0'...'9' to match characters from '0' to '9'
// you cannot match multiple characters at once such as case '<=', this will not match any legal input character
Token next_token()
{
switch(ch)
{
// ****** ADD NEW CODE HERE ******
// add case labels and calls for the each token kind here ...
// end of input
case EOF: return parse_eoi() ;
// a single character that cannot start any other kind of token
default: return parse_byte() ;
}
}
}
#include <string>
// Interface for the workshop tokeniser
namespace Tokeniser
{
// shorthand name for the string type
typedef std::string string ;
// the ID used to reference a Token - the implementation is hidden
typedef int Token ;
// The kinds of token that are recognised?
enum TokenKind
{
// *** BNF syntax for tokens
// * literals inside ' '
// * grouping ( )
// * ASCII ranges -
// * alternatives |
// * 0 or 1 ?
// * 0 or more *
// * 1 or more +
//
// Tokeniser Tokens
// Grammar rule: Definition
// input: token* eoi
// token: byte | stage1 | stage2 | stage3 | stage4 | stage5
// byte: any single character that cannot start another token
// stage1: ?? see below ??
// stage2: ?? see below ??
// stage3: ?? see below ??
// stage4: ?? see below ??
// stage5: ?? see below ??
// eoi: the end of the input or any unexpected character
//
// * all input after returning an eoi token is ignored
// * when parsing a token, if the token cannot be completed return the eoi token
// * the web submission system will reveal the grammar rule(s) for each stage
// once any test of the previous stage has been passed
// ie the stage1 rules will be revealed by the first successful web submission of the start up files
// the stage2 rules will be revealed once any of the stage1 tests is passed,
// the stage3 rules will be revealed once any of the stage3 tests is passed, and so on
tk_eoi, // end of input reached or an unexpected character was found
tk_byte, // any single byte character that cannot start another token
// details of the following token kinds will be revealed by the web submission system
// this will include the grammar rules and, if required, details of any spelling changes to make
// not all of these token kinds may be used
tk_number, // some form of number
tk_identifier, // some form of identifier
tk_operator, // some form of operator
tk_comment, // some form of comment
tk_string, // some form of string
tk_arbitrary, // some form of arbitrary token
tk_weird, // some form of arbitrary token
tk_last // ignore this
} ;
// ***** the following are implemented in library.cpp *****
// create a new Token object and return its ID
extern Token new_token(TokenKind kind,string spelling) ;
// return the TokenKind for the given token
extern TokenKind token_kind(Token token) ;
// return the characters that make up the given token
extern string token_spelling(Token token) ;
// return a string representation of the given token
extern string token_to_string(Token token) ;
// the current input character, initiliased by first call of nextch()
// it is type int rather than char so it does not get confused with character code 255
extern int ch ;
// read next character if not at the end of input
extern void nextch() ;
// ***** the practical involves implementing the following function by completing tokeniser.cpp *****
// read the next token from standard input
extern Token next_token() ;
}
#endif