From e33e19d0587146859d48a134ec9fd94e7b7ba5cd Mon Sep 17 00:00:00 2001 From: "FWoltermann@gmail.com" Date: Thu, 8 Dec 2011 14:53:40 +0000 Subject: Initial upload --- Parser/Parser.cpp | 309 ++++++++++++++++++++++++++++++ Parser/Parser.h | 46 +++++ Parser/Reader.cpp | 114 ++++++++++++ Parser/Reader.h | 68 +++++++ Parser/Term.cpp | 122 ++++++++++++ Parser/Term.h | 172 +++++++++++++++++ Parser/Token.cpp | 546 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Parser/Token.h | 146 +++++++++++++++ Parser/stdafx.h | 1 + 9 files changed, 1524 insertions(+) create mode 100644 Parser/Parser.cpp create mode 100644 Parser/Parser.h create mode 100644 Parser/Reader.cpp create mode 100644 Parser/Reader.h create mode 100644 Parser/Term.cpp create mode 100644 Parser/Term.h create mode 100644 Parser/Token.cpp create mode 100644 Parser/Token.h create mode 100644 Parser/stdafx.h (limited to 'Parser') diff --git a/Parser/Parser.cpp b/Parser/Parser.cpp new file mode 100644 index 0000000..f8dea6f --- /dev/null +++ b/Parser/Parser.cpp @@ -0,0 +1,309 @@ +/* Project STARS + John DiCamillo Software Consulting + Copyright © 1997-2004. All Rights Reserved. + + SUBSYSTEM: parser + FILE: parser.cpp + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Implementation of the generic Parser class +*/ + +#include "MemDebug.h" +#include "reader.h" +#include "token.h" +#include "parser.h" +#include +#include + +enum KEYS { KEY_TRUE, KEY_FALSE, KEY_DEF, KEY_MINUS }; + +void Print(const char* fmt, ...); + +static int dump_tokens = 0; + +// +-------------------------------------------------------------------+ + +Term* error(char* msg, const Token& token) +{ + static char buf[1024]; + sprintf(buf, " near '%s' in line %d.", (const char*) token.symbol(), token.line()); + + return error(msg, buf); +} + +// +-------------------------------------------------------------------+ + +Parser::Parser(Reader* r) +{ + reader = r ? r : new(__FILE__, __LINE__) ConsoleReader; + lexer = new(__FILE__, __LINE__) Scanner(reader); + + Token::addKey("true", KEY_TRUE); + Token::addKey("false", KEY_FALSE); + Token::addKey(":", KEY_DEF); + Token::addKey("-", KEY_MINUS); +} + +Parser::~Parser() +{ + delete lexer; + delete reader; + //Token::close(); +} + +Term* +Parser::ParseTerm() +{ + Term* t = ParseTermBase(); + if (t == 0) return t; + + Term* t2 = ParseTermRest(t); + + return t2; +} + +Term* +Parser::ParseTermRest(Term* base) +{ + Token t = lexer->Get(); + + switch (t.type()) { + default: + lexer->PutBack(); + return base; + + case Token::StringLiteral: { + // concatenate adjacent string literal tokens: + TermText* text = base->isText(); + if (text) { + TermText* base2 = new(__FILE__, __LINE__) TermText(text->value() + t.symbol()(1, t.symbol().length()-2)); + delete base; + return ParseTermRest(base2); + } + else { + lexer->PutBack(); + } + } + break; + + case Token::Keyword: + switch (t.key()) { + case KEY_DEF: + if (base->isText()) + return new(__FILE__, __LINE__) TermDef(base->isText(), ParseTerm()); + else + return error("(Parse) illegal lhs in def", t); + + default: + lexer->PutBack(); + return base; + } + break; + } + + return base; +} + +static int xtol(const char* p) +{ + int n = 0; + + while (*p) { + char digit = *p++; + n *= 16; + + if (digit >= '0' && digit <= '9') + n += digit - '0'; + + else if (digit >= 'a' && digit <= 'f') + n += digit - 'a' + 10; + + else if (digit >= 'A' && digit <= 'F') + n += digit - 'A' + 10; + } + + return n; +} + +Term* +Parser::ParseTermBase() +{ + Token t = lexer->Get(); + int n = 0; + double d = 0.0; + + switch (t.type()) { + case Token::IntLiteral: { + if (dump_tokens) + Print("%s", t.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) t.symbol().length(); i++) + if (t.symbol()[i] != '_') + *p++ = t.symbol()[i]; + *p++ = '\0'; + + // handle hex notation: + if (nstr[1] == 'x') + n = xtol(nstr+2); + + else + n = atol(nstr); + + return new(__FILE__, __LINE__) TermNumber(n); + } + + case Token::FloatLiteral: { + if (dump_tokens) + Print("%s", t.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) t.symbol().length(); i++) + if (t.symbol()[i] != '_') + *p++ = t.symbol()[i]; + *p++ = '\0'; + + d = atof(nstr); + return new(__FILE__, __LINE__) TermNumber(d); + } + + case Token::StringLiteral: + if (dump_tokens) + Print("%s", t.symbol().data()); + + return new(__FILE__, __LINE__) TermText(t.symbol()(1, t.symbol().length()-2)); + + case Token::AlphaIdent: + if (dump_tokens) + Print("%s", t.symbol().data()); + + return new(__FILE__, __LINE__) TermText(t.symbol()); + + case Token::Keyword: + if (dump_tokens) + Print("%s", t.symbol().data()); + + switch (t.key()) { + case KEY_FALSE: return new(__FILE__, __LINE__) TermBool(0); + case KEY_TRUE: return new(__FILE__, __LINE__) TermBool(1); + + case KEY_MINUS: { + Token next = lexer->Get(); + if (next.type() == Token::IntLiteral) { + if (dump_tokens) + Print("%s", next.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) next.symbol().length(); i++) + if (next.symbol()[i] != '_') + *p++ = next.symbol()[i]; + *p++ = '\0'; + + n = -1 * atol(nstr); + return new(__FILE__, __LINE__) TermNumber(n); + } + else if (next.type() == Token::FloatLiteral) { + if (dump_tokens) + Print("%s", next.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) next.symbol().length(); i++) + if (next.symbol()[i] != '_') + *p++ = next.symbol()[i]; + *p++ = '\0'; + + d = -1.0 * atof(nstr); + return new(__FILE__, __LINE__) TermNumber(d); + } + else { + lexer->PutBack(); + return error("(Parse) illegal token '-': number expected", next); + } + } + break; + + default: + lexer->PutBack(); + return 0; + } + + case Token::LParen: return ParseArray(); + + case Token::LBrace: return ParseStruct(); + + case Token::CharLiteral: + return error("(Parse) illegal token ", t); + + default: + lexer->PutBack(); + return 0; + } +} + +TermArray* +Parser::ParseArray() +{ + TermList* elems = ParseTermList(0); + Token end = lexer->Get(); + + if (end.type() != Token::RParen) + return (TermArray*) error("(Parse) ')' missing in array-decl", end); + + return new(__FILE__, __LINE__) TermArray(elems); +} + +TermStruct* +Parser::ParseStruct() +{ + TermList* elems = ParseTermList(1); + Token end = lexer->Get(); + + if (end.type() != Token::RBrace) + return (TermStruct*) error("(Parse) '}' missing in struct", end); + + return new(__FILE__, __LINE__) TermStruct(elems); +} + +TermList* +Parser::ParseTermList(int for_struct) +{ + TermList* tlist = new(__FILE__, __LINE__) TermList; + + Term* term = ParseTerm(); + while (term) { + if (for_struct && !term->isDef()) { + return (TermList*) error("(Parse) non-definition term in struct"); + } + else if (!for_struct && term->isDef()) { + return (TermList*) error("(Parse) illegal definition in array"); + } + + tlist->append(term); + Token t = lexer->Get(); + + /*** OLD WAY: COMMA SEPARATORS REQUIRED *** + if (t.type() != Token::Comma) { + lexer->PutBack(); + term = 0; + } + else + term = ParseTerm(); + /*******************************************/ + + // NEW WAY: COMMA SEPARATORS OPTIONAL: + if (t.type() != Token::Comma) { + lexer->PutBack(); + } + + term = ParseTerm(); + } + + return tlist; +} + + + diff --git a/Parser/Parser.h b/Parser/Parser.h new file mode 100644 index 0000000..652c975 --- /dev/null +++ b/Parser/Parser.h @@ -0,0 +1,46 @@ +/* Project nGen + John DiCamillo Software Consulting + Copyright © 1997-2004. All Rights Reserved. + + SUBSYSTEM: parser + FILE: parser.h + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Declaration of the generic Parser class +*/ + +#ifndef PARSER_H +#define PARSER_H + +#include "text.h" +#include "term.h" + +// +-------------------------------------------------------------------+ + +class Reader; +class Scanner; + +// +-------------------------------------------------------------------+ + +class Parser +{ +public: + Parser(Reader* r = 0); + ~Parser(); + + Term* ParseTerm(); + Term* ParseTermBase(); + Term* ParseTermRest(Term* base); + TermList* ParseTermList(int for_struct); + TermArray* ParseArray(); + TermStruct* ParseStruct(); + +private: + Reader* reader; + Scanner* lexer; +}; + +#endif diff --git a/Parser/Reader.cpp b/Parser/Reader.cpp new file mode 100644 index 0000000..14b1126 --- /dev/null +++ b/Parser/Reader.cpp @@ -0,0 +1,114 @@ +/* Project STARS + John DiCamillo Software Consulting + Copyright © 1997-2000. All Rights Reserved. + + SUBSYSTEM: obelisk + FILE: reader.cpp + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Implementation of the Reader class +*/ + +#include "MemDebug.h" +#include "reader.h" +#include +#include +#include + + +void Print(const char* fmt, ...); + +// +-------------------------------------------------------------------+ + +Text +ConsoleReader::more() +{ + // loop until the user types something + do { + printPrimaryPrompt(); + fillInputBuffer(); + } while (! *p); + + return Text(p); +} + +void +ConsoleReader::printPrimaryPrompt() +{ + printf("- "); +} + +void +ConsoleReader::fillInputBuffer() +{ + fgets(buffer, 980, stdin); + p = buffer; + while (isspace(*p)) p++; +} + +// +-------------------------------------------------------------------+ + +FileReader::FileReader(const char* fname) + : filename(fname), done(0) +{ } + +Text +FileReader::more() +{ + if (done) return Text(); + + ifstream fin; + fin.open(filename, ios::in | ios::nocreate); + + if (!fin) { + Print("ERROR(Parse): Could not open file '%s'\n", filename); + return Text(); + } + + Text result; + char buf[1000], newline; + + while (fin.get(buf, 1000)) { + result.append(buf); + fin.get(newline); + result.append(newline); + } + + done = 1; + return result; +} + +// +-------------------------------------------------------------------+ + +BlockReader::BlockReader(const char* block) + : data((char*) block), done(0), length(0) +{ } + +BlockReader::BlockReader(const char* block, int len) + : data((char*) block), done(0), length(len) +{ } + +Text +BlockReader::more() +{ + if (done) return Text(); + + if (length) { + Text result(data, length); + done = 1; + return result; + } + else if (data) { + Text result(data); + done = 1; + return result; + } + + done = 1; + return Text(); +} + + diff --git a/Parser/Reader.h b/Parser/Reader.h new file mode 100644 index 0000000..4af6992 --- /dev/null +++ b/Parser/Reader.h @@ -0,0 +1,68 @@ +/* Project nGen + John DiCamillo Software Consulting + Copyright © 1997-2000. All Rights Reserved. + + SUBSYSTEM: obelisk + FILE: reader.h + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Declaration of the Reader class +*/ + +#ifndef READER_H +#define READER_H + +#include "text.h" + +// +-------------------------------------------------------------------+ + +class Reader +{ +public: + Reader() { } + virtual ~Reader() { } + + virtual Text more() = 0; +}; + +class ConsoleReader : public Reader +{ +public: + virtual Text more(); + + void printPrimaryPrompt(); + void fillInputBuffer(); + +private: + char buffer[1000]; + char* p; +}; + +class FileReader : public Reader +{ +public: + FileReader(const char* fname); + virtual Text more(); + +private: + Text filename; + int done; +}; + +class BlockReader : public Reader +{ +public: + BlockReader(const char* block); + BlockReader(const char* block, int len); + virtual Text more(); + +private: + char* data; + int done; + int length; +}; + +#endif diff --git a/Parser/Term.cpp b/Parser/Term.cpp new file mode 100644 index 0000000..5c5f9d4 --- /dev/null +++ b/Parser/Term.cpp @@ -0,0 +1,122 @@ +/* Project STARS + John DiCamillo Software Consulting + Copyright © 1997-2000. All Rights Reserved. + + SUBSYSTEM: Stars + FILE: Term.cpp + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Implementation of the Term class +*/ + + +#include "MemDebug.h" +#include "Term.h" + +void Print(const char* fmt, ...); + +// +-------------------------------------------------------------------+ + +Term* +error(char* s1, char* s2) +{ + Print("ERROR: "); + if (s1) Print(s1); + if (s2) Print(s2); + Print("\n\n"); + return 0; +} + +// +-------------------------------------------------------------------+ + +void TermBool::print(int level) { if (level > 0) Print(val? "true" : "false"); else Print("..."); } +void TermNumber::print(int level){ if (level > 0) Print("%g", val); else Print("..."); } +void TermText::print(int level) { if (level > 0) Print("\"%s\"", val); else Print("..."); } + +// +-------------------------------------------------------------------+ + +TermArray::TermArray(TermList* elist) +{ + elems = elist; +} + +TermArray::~TermArray() +{ + if (elems) elems->destroy(); + delete elems; +} + +void +TermArray::print(int level) +{ + if (level > 1) { + Print("("); + + if (elems) { + for (int i = 0; i < elems->size(); i++) { + elems->at(i)->print(level-1); + if (i < elems->size() -1) + Print(", "); + } + } + + Print(") "); + } + else Print("(...) "); +} + +// +-------------------------------------------------------------------+ + +TermStruct::TermStruct(TermList* elist) +{ + elems = elist; +} + +TermStruct::~TermStruct() +{ + if (elems) elems->destroy(); + delete elems; +} + +void +TermStruct::print(int level) +{ + if (level > 1) { + Print("{"); + + if (elems) { + for (int i = 0; i < elems->size(); i++) { + elems->at(i)->print(level-1); + if (i < elems->size() -1) + Print(", "); + } + } + + Print("} "); + } + else Print("{...} "); +} + +// +-------------------------------------------------------------------+ + +TermDef::~TermDef() +{ + delete mname; + delete mval; +} + +void +TermDef::print(int level) +{ + if (level >= 0) { + mname->print(level); + Print(": "); + mval->print(level-1); + } + else Print("..."); +} + +// +-------------------------------------------------------------------+ diff --git a/Parser/Term.h b/Parser/Term.h new file mode 100644 index 0000000..8597b97 --- /dev/null +++ b/Parser/Term.h @@ -0,0 +1,172 @@ +/* Project STARS + John DiCamillo Software Consulting + Copyright © 1997-2000. All Rights Reserved. + + SUBSYSTEM: Stars + FILE: term.h + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Declaration of the Abstract Syntax Tree classes +*/ + + +#ifndef TERM_H +#define TERM_H + +#include "Text.h" +#include "List.h" + +// +-------------------------------------------------------------------+ + +class Term; +class TermBool; +class TermNumber; +class TermText; +class TermArray; +class TermDef; +class TermStruct; + +// +-------------------------------------------------------------------+ + +class Term +{ +public: + static const char* TYPENAME() { return "Term"; } + + Term() { } + virtual ~Term() { } + + virtual int operator==(const Term& rhs) const { return 0; } + + virtual void print(int level=10) { } + + // conversion tests + virtual Term* touch() { return this; } + virtual TermBool* isBool() { return 0; } + virtual TermNumber* isNumber() { return 0; } + virtual TermText* isText() { return 0; } + virtual TermArray* isArray() { return 0; } + virtual TermDef* isDef() { return 0; } + virtual TermStruct* isStruct() { return 0; } +}; + +Term* error(char*, char* = 0); + +// +-------------------------------------------------------------------+ + +typedef List TermList; +typedef ListIter TermListIter; + +// +-------------------------------------------------------------------+ + +class TermBool : public Term +{ +public: + static const char* TYPENAME() { return "TermBool"; } + + TermBool(bool v) : val(v) { } + + virtual void print(int level=10); + virtual TermBool* isBool() { return this; } + bool value() const { return val; } + +private: + bool val; +}; + +// +-------------------------------------------------------------------+ + +class TermNumber : public Term +{ +public: + static const char* TYPENAME() { return "TermNumber"; } + + TermNumber(double v) : val(v) { } + + virtual void print(int level=10); + virtual TermNumber* isNumber() { return this; } + double value() const { return val; } + +private: + double val; +}; + +// +-------------------------------------------------------------------+ + +class TermText : public Term +{ +public: + static const char* TYPENAME() { return "TermText"; } + + TermText(const Text& v) : val(v) { } + + virtual void print(int level=10); + virtual TermText* isText() { return this; } + Text value() const { return val; } + +private: + Text val; +}; + +// +-------------------------------------------------------------------+ + +class TermArray : public Term +{ +public: + static const char* TYPENAME() { return "TermArray"; } + + TermArray(TermList* elist); + virtual ~TermArray(); + + virtual void print(int level=10); + virtual TermArray* isArray() { return this; } + TermList* elements() { return elems; } + +private: + TermList* elems; +}; + +// +-------------------------------------------------------------------+ + +class TermStruct : public Term +{ +public: + static const char* TYPENAME() { return "TermStruct"; } + + TermStruct(TermList* elist); + virtual ~TermStruct(); + + virtual void print(int level=10); + + virtual TermStruct* isStruct() { return this; } + TermList* elements() { return elems; } + +private: + TermList* elems; +}; + +// +-------------------------------------------------------------------+ + +class TermDef : public Term +{ +public: + static const char* TYPENAME() { return "TermDef"; } + + TermDef(TermText* n, Term* v) : mname(n), mval(v) { } + virtual ~TermDef(); + + virtual void print(int level=10); + virtual TermDef* isDef() { return this; } + + virtual TermText* name() { return mname; } + virtual Term* term() { return mval; } + +private: + TermText* mname; + Term* mval; +}; + +#endif diff --git a/Parser/Token.cpp b/Parser/Token.cpp new file mode 100644 index 0000000..a7470ee --- /dev/null +++ b/Parser/Token.cpp @@ -0,0 +1,546 @@ +/* Project STARS + John DiCamillo Software Consulting + Copyright © 1997-2000. All Rights Reserved. + + SUBSYSTEM: Stars + FILE: token.cpp + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Scanner class implementation +*/ + +#include "MemDebug.h" +#include "Token.h" +#include "Reader.h" +#include "Text.h" + +#include + +// +-------------------------------------------------------------------+ + +bool Token::hidecom = true; +char Token::combeg[3] = "//"; +char Token::comend[3] = "\n"; +char Token::altbeg[3] = "/*"; +char Token::altend[3] = "*/"; +Dictionary Token::keymap; + +// +-------------------------------------------------------------------+ + +Token::Token() + : mType(Undefined), mKey(0), mLine(0), mColumn(0) +{ + mLength = 0; + mSymbol[0] = '\0'; +} + +Token::Token(const Token& rhs) + : mType(rhs.mType), mKey(rhs.mKey), mLine(rhs.mLine), mColumn(rhs.mColumn) +{ + mLength = rhs.mLength; + if (mLength < 8) { + strcpy(mSymbol, rhs.mSymbol); + } + else { + mFullSymbol = new(__FILE__, __LINE__) char[mLength + 1]; + strcpy(mFullSymbol, rhs.mFullSymbol); + } +} + +Token::Token(int t) + : mType(t), mKey(0), mLine(0), mColumn(0) +{ + mLength = 0; + mSymbol[0] = '\0'; +} + +Token::Token(const char* s, int t, int k, int l, int c) + : mType(t), mKey(k), mLine(l), mColumn(c) +{ + mLength = strlen(s); + if (mLength < 8) { + strcpy(mSymbol, s); + } + else { + mFullSymbol = new(__FILE__, __LINE__) char[mLength + 1]; + strcpy(mFullSymbol, s); + } +} + +Token::Token(const Text& s, int t, int k, int l, int c) + : mType(t), mKey(k), mLine(l), mColumn(c) +{ + mLength = s.length(); + if (mLength < 8) { + strcpy(mSymbol, s.data()); + } + else { + mFullSymbol = new(__FILE__, __LINE__) char[mLength + 1]; + strcpy(mFullSymbol, s.data()); + } +} + +Token::~Token() +{ + if (mLength >= 8) + delete [] mFullSymbol; +} + +// +-------------------------------------------------------------------+ + +void +Token::close() +{ + keymap.clear(); +} + +// +-------------------------------------------------------------------+ + +Token& +Token::operator = (const Token& rhs) +{ + if (mLength >= 8) + delete [] mFullSymbol; + + mLength = rhs.mLength; + if (mLength < 8) { + strcpy(mSymbol, rhs.mSymbol); + } + else { + mFullSymbol = new(__FILE__, __LINE__) char[mLength + 1]; + strcpy(mFullSymbol, rhs.mFullSymbol); + } + + mType = rhs.mType; + mKey = rhs.mKey; + mLine = rhs.mLine; + mColumn = rhs.mColumn; + + return *this; +} + +// +-------------------------------------------------------------------+ + +bool +Token::match(const Token& ref) const +{ + if (mType == ref.mType) { // if types match + if (ref.mLength == 0) // if no symbol to match + return true; // match! + + else if (mLength == ref.mLength) { // else if symbols match + if (mLength < 8) { + if (!strcmp(mSymbol, ref.mSymbol)) + return true; // match! + } + else { + if (!strcmp(mFullSymbol, ref.mFullSymbol)) + return true; // match! + } + } + } + + return false; +} + +// +-------------------------------------------------------------------+ + +Text +Token::symbol() const +{ + if (mLength < 8) + return Text(mSymbol); + else + return Text(mFullSymbol); +} + +// +-------------------------------------------------------------------+ + +void +Token::addKey(const Text& k, int v) +{ + keymap.insert(k, v); +} + +// +-------------------------------------------------------------------+ + +void +Token::addKeys(Dictionary& keys) +{ + DictionaryIter iter = keys; + while (++iter) + keymap.insert(iter.key(), iter.value()); +} + +// +-------------------------------------------------------------------+ + +bool +Token::findKey(const Text& k, int& v) +{ + if (keymap.contains(k)) { + v = keymap.find(k); + return true; + } + else + return false; +} + +// +-------------------------------------------------------------------+ + +void +Token::comments(const Text& begin, const Text& end) +{ + combeg[0] = begin(0); + if (begin.length() > 1) combeg[1] = begin(1); + else combeg[1] = '\0'; + + comend[0] = end(0); + if (end.length() > 1) comend[1] = end(1); + else comend[1] = '\0'; +} + +// +-------------------------------------------------------------------+ + +void +Token::altComments(const Text& begin, const Text& end) +{ + altbeg[0] = begin(0); + if (begin.length() > 1) altbeg[1] = begin(1); + else altbeg[1] = '\0'; + + altend[0] = end(0); + if (end.length() > 1) altend[1] = end(1); + else altend[1] = '\0'; +} + +// +-------------------------------------------------------------------+ + +Text +Token::typestr() const +{ + Text t = "Unknown"; + switch (type()) { + case Undefined: t = "Undefined"; break; + case Keyword: t = "Keyword"; break; + case AlphaIdent: t = "AlphaIdent"; break; + case SymbolicIdent: t = "SymbolicIdent"; break; + case Comment: t = "Comment"; break; + case IntLiteral: t = "IntLiteral"; break; + case FloatLiteral: t = "FloatLiteral"; break; + case StringLiteral: t = "StringLiteral"; break; + case CharLiteral: t = "CharLiteral"; break; + case Dot: t = "Dot"; break; + case Comma: t = "Comma"; break; + case Colon: t = "Colon"; break; + case Semicolon: t = "Semicolon"; break; + case LParen: t = "LParen"; break; + case RParen: t = "RParen"; break; + case LBracket: t = "LBracket"; break; + case RBracket: t = "RBracket"; break; + case LBrace: t = "LBrace"; break; + case RBrace: t = "RBrace"; break; + case EOT: t = "EOT"; break; + case LastTokenType: t = "LastTokenType"; break; + } + + return t; +} + +// +-------------------------------------------------------------------+ + +Text +Token::describe(const Text& tok) +{ + Text d; + + switch (tok(0)) { + case '.' : d = "Token::Dot"; break; + case ',' : d = "Token::Comma"; break; + case ';' : d = "Token::Semicolon"; break; + case '(' : d = "Token::LParen"; break; + case ')' : d = "Token::RParen"; break; + case '[' : d = "Token::LBracket"; break; + case ']' : d = "Token::RBracket"; break; + case '{' : d = "Token::LBrace"; break; + case '}' : d = "Token::RBrace"; break; + default : break; + } + + if (d.length() == 0) { + if (isalpha(tok(0))) + d = "\"" + tok + "\", Token::AlphaIdent"; + else if (isdigit(tok(0))) { + if (tok.contains(".")) + d = "\"" + tok + "\", Token::FloatLiteral"; + else + d = "\"" + tok + "\", Token::IntLiteral"; + } + else + d = "\"" + tok + "\", Token::SymbolicIdent"; + } + + return d; +} + +// +-------------------------------------------------------------------+ + +Scanner::Scanner(Reader* r) + : reader(r), str(0), index(0), old_index(0), + length(0), line(0), old_line(0), lineStart(0) +{ } + +Scanner::Scanner(const Scanner& rhs) + : index(rhs.index), old_index(rhs.old_index), length(rhs.length), + reader(rhs.reader), + line(rhs.line), old_line(0), lineStart(rhs.lineStart) +{ + str = new(__FILE__, __LINE__) char [strlen(rhs.str) + 1]; + strcpy(str, rhs.str); +} + +Scanner::Scanner(const Text& s) + : reader(0), index(0), old_index(0), length(s.length()), line(0), + old_line(0), lineStart(0) +{ + str = new(__FILE__, __LINE__) char [s.length() + 1]; + strcpy(str, s.data()); +} + +Scanner::~Scanner() +{ + delete [] str; +} + +// +-------------------------------------------------------------------+ + +Scanner& +Scanner::operator = (const Scanner& rhs) +{ + delete [] str; + str = new(__FILE__, __LINE__) char [strlen(rhs.str) + 1]; + strcpy(str, rhs.str); + + index = rhs.index; + old_index = rhs.old_index; + length = rhs.length; + line = rhs.line; + old_line = rhs.old_line; + lineStart = rhs.lineStart; + + return *this; +} + +// +-------------------------------------------------------------------+ + +void +Scanner::Load(const Text& s) +{ + delete [] str; + str = new(__FILE__, __LINE__) char [s.length() + 1]; + strcpy(str, s.data()); + + index = 0; + old_index = 0; + best = Token(); + length = s.length(); + line = 0; + old_line = 0; + lineStart = 0; +} + +// +-------------------------------------------------------------------+ + +Token +Scanner::Get(Need need) +{ + int type = Token::EOT; + old_index = index; + old_line = line; + + eos = str + length; + p = str + index; + + if (p >= eos) { + if (need == Demand && reader) { + Load(reader->more()); + if (length > 0) + return Get(need); + } + return Token("", type, 0, line, 0); + } + + while (isspace(*p) && p < eos) { // skip initial white space + if (*p == '\n') { + line++; + lineStart = p - str; + } + p++; + } + + if (p >= eos) { + if (need == Demand && reader) { + Load(reader->more()); + if (length > 0) + return Get(need); + } + return Token("", type, 0, line, 0); + } + + Token result; + size_t start = p - str; + + if (*p == '"' || *p == '\'') { // special case for quoted tokens + + if (*p == '"') type = Token::StringLiteral; + else type = Token::CharLiteral; + + char match = *p; + while (++p < eos) { + if (*p == match) { // find matching quote + if (*(p-1) != '\\') { // if not escaped + p++; // token includes matching quote + break; + } + } + } + } + + // generic delimited comments + else if (*p == Token::comBeg(0) && + (!Token::comBeg(1) || *(p+1) == Token::comBeg(1))) { + type = Token::Comment; + while (++p < eos) { + if (*p == Token::comEnd(0) && + (!Token::comEnd(1) || *(p+1) == Token::comEnd(1))) { + p++; if (Token::comEnd(1)) p++; + break; + } + } + } + + // alternate form delimited comments + else if (*p == Token::altBeg(0) && + (!Token::altBeg(1) || *(p+1) == Token::altBeg(1))) { + type = Token::Comment; + while (++p < eos) { + if (*p == Token::altEnd(0) && + (!Token::altEnd(1) || *(p+1) == Token::altEnd(1))) { + p++; if (Token::altEnd(1)) p++; + break; + } + } + } + + else if (*p == '.') type = Token::Dot; + else if (*p == ',') type = Token::Comma; + else if (*p == ';') type = Token::Semicolon; + else if (*p == '(') type = Token::LParen; + else if (*p == ')') type = Token::RParen; + else if (*p == '[') type = Token::LBracket; + else if (*p == ']') type = Token::RBracket; + else if (*p == '{') type = Token::LBrace; + else if (*p == '}') type = Token::RBrace; + + // use lexical sub-parser for ints and floats + else if (isdigit(*p)) + type = GetNumeric(); + + else if (IsSymbolic(*p)) { + type = Token::SymbolicIdent; + while (IsSymbolic(*p)) p++; + } + + else { + type = Token::AlphaIdent; + while (IsAlpha(*p)) p++; + } + + size_t extent = (p - str) - start; + + if (extent < 1) extent = 1; // always get at least one character + + index = start + extent; // advance the cursor + int col = start - lineStart; + if (line == 0) col++; + + char* buf = new(__FILE__, __LINE__) char [extent + 1]; + strncpy(buf, str + start, extent); + buf[extent] = '\0'; + + if (type == Token::Comment && Token::hidecom) { + delete [] buf; + if (Token::comEnd(0) == '\n') { + line++; + lineStart = p - str; + } + return Get(need); + } + + if (type == Token::AlphaIdent || // check for keyword + type == Token::SymbolicIdent) { + int val; + if (Token::findKey(Text(buf), val)) + result = Token(buf, Token::Keyword, val, line+1, col); + } + + if (result.mType != Token::Keyword) + result = Token(buf, type, 0, line+1, col); + + if (line+1 > (size_t) best.mLine || + (line+1 == (size_t) best.mLine && col > best.mColumn)) + best = result; + + delete [] buf; + return result; +} + +// +-------------------------------------------------------------------+ + +int +Scanner::GetNumeric() +{ + int type = Token::IntLiteral; // assume int + + if (*p == '0' && *(p+1) == 'x') { // check for hex: + p += 2; + while (isxdigit(*p)) p++; + return type; + } + + while (isdigit(*p) || *p == '_') p++; // whole number part + + if (*p == '.') { p++; // optional fract part + type = Token::FloatLiteral; // implies float + + while (isdigit(*p) || *p == '_') p++; // fractional part + } + + if (*p == 'E' || *p == 'e') { p++; // optional exponent + if (*p == '+' || *p == '-') p++; // which may be signed + while (isdigit(*p)) p++; + + type = Token::FloatLiteral; // implies float + } + + return type; +} + +// +-------------------------------------------------------------------+ + +bool +Scanner::IsAlpha(char c) +{ + return (isalpha(*p) || isdigit(*p) || (*p == '_'))?true:false; +} + +// +-------------------------------------------------------------------+ + +bool +Scanner::IsSymbolic(char c) +{ + const char* s = "+-*/\\<=>~!@#$%^&|:"; + return strchr(s, c)?true:false; +} diff --git a/Parser/Token.h b/Parser/Token.h new file mode 100644 index 0000000..870fa66 --- /dev/null +++ b/Parser/Token.h @@ -0,0 +1,146 @@ +/* Project STARSHATTER + John DiCamillo + Copyright © 1997-2001. All Rights Reserved. + + SUBSYSTEM: Parser + FILE: Token.h + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Scanner class definition +*/ + +#ifndef Token_h +#define Token_h + +#include "Text.h" +#include "Dictionary.h" + +#pragma warning( disable : 4237) + +// +-------------------------------------------------------------------+ + +class Reader; +class Token; +class Scanner; + +// +-------------------------------------------------------------------+ + +class Token +{ + friend class Scanner; + +public: + // keywords must be alphanumeric identifiers or symbolic identifiers + enum Types { Undefined, Keyword, AlphaIdent, SymbolicIdent, Comment, + IntLiteral, FloatLiteral, StringLiteral, CharLiteral, + Dot, Comma, Colon, Semicolon, + LParen, RParen, LBracket, RBracket, LBrace, RBrace, + EOT, LastTokenType }; + + enum Alias { CompoundSeparator = Dot, + ItemSeparator = Comma, + StatementTerminator = Semicolon, + TypeIndicator = Colon, + Lambda = LastTokenType + 1 }; + + Token(); + Token(const Token& rhs); + Token(int t); + Token(const char* s, int t, int k=0, int l=0, int c=0); + Token(const Text& s, int t, int k=0, int l=0, int c=0); + ~Token(); + + Token& operator = (const Token& rhs); + + bool match(const Token& ref) const; + + Text symbol() const; + int type() const { return mType; } + int key() const { return mKey; } + int line() const { return mLine; } + int column() const { return mColumn; } + + Text typestr() const; + + static Text describe(const Text& tok); + static void addKey(const Text& k, int v); + static void addKeys(Dictionary& keys); + static bool findKey(const Text& k, int& v); + static void comments(const Text& begin, const Text& end); + static void altComments(const Text& begin, const Text& end); + static void hideComments(bool hide = true) { hidecom = hide; } + + static char comBeg(unsigned int i) { return combeg[i]; } + static char comEnd(unsigned int i) { return comend[i]; } + static char altBeg(unsigned int i) { return altbeg[i]; } + static char altEnd(unsigned int i) { return altend[i]; } + + static void close(); + +protected: + int mLength; + union { + char mSymbol[8]; + char* mFullSymbol; + }; + int mType; + int mKey; + int mLine; + int mColumn; + + static bool hidecom; + static char combeg[3]; + static char comend[3]; + static char altbeg[3]; + static char altend[3]; + + static Dictionary keymap; +}; + +// +-------------------------------------------------------------------+ + +class Scanner +{ +public: + Scanner(Reader* r = 0); + Scanner(const Text& s); + Scanner(const Scanner& rhs); + virtual ~Scanner(); + + Scanner& operator = (const Scanner& rhs); + + void Load(const Text& s); + + enum Need { Demand, Request }; + virtual Token Get(Need n = Demand); + + void PutBack() { index = old_index; line = old_line; } + int GetCursor() { return index; } + int GetLine() { return line; } + void Reset(int c, int l) { index = old_index = c; line = old_line = l; } + Token Best() const { return best; } + +protected: + virtual int GetNumeric(); + virtual bool IsSymbolic(char c); + virtual bool IsAlpha(char c); + + Reader* reader; + char* str; + + const char* p; + const char* eos; + + size_t index; + size_t old_index; + Token best; + size_t length; + size_t line; + size_t old_line; + size_t lineStart; +}; + +#endif // TOKEN_H diff --git a/Parser/stdafx.h b/Parser/stdafx.h new file mode 100644 index 0000000..071c3ed --- /dev/null +++ b/Parser/stdafx.h @@ -0,0 +1 @@ +#include "MemDebug.h" \ No newline at end of file -- cgit v1.1