From 94ef3b0248485714ca8e635af3811d788ee930e2 Mon Sep 17 00:00:00 2001 From: Aki Date: Sat, 2 Apr 2022 19:19:08 +0200 Subject: Moved def format implementation to own module --- CMakeLists.txt | 1 + DefinitionEx/CMakeLists.txt | 15 ++ DefinitionEx/Parser.cpp | 307 +++++++++++++++++++++++++ DefinitionEx/Parser.h | 45 ++++ DefinitionEx/Term.cpp | 119 ++++++++++ DefinitionEx/Term.h | 171 ++++++++++++++ DefinitionEx/Token.cpp | 544 ++++++++++++++++++++++++++++++++++++++++++++ DefinitionEx/Token.h | 145 ++++++++++++ StarsEx/CMakeLists.txt | 4 +- StarsEx/Parser.cpp | 307 ------------------------- StarsEx/Parser.h | 45 ---- StarsEx/Term.cpp | 119 ---------- StarsEx/Term.h | 171 -------------- StarsEx/Token.cpp | 544 -------------------------------------------- StarsEx/Token.h | 145 ------------ 15 files changed, 1348 insertions(+), 1334 deletions(-) create mode 100644 DefinitionEx/CMakeLists.txt create mode 100644 DefinitionEx/Parser.cpp create mode 100644 DefinitionEx/Parser.h create mode 100644 DefinitionEx/Term.cpp create mode 100644 DefinitionEx/Term.h create mode 100644 DefinitionEx/Token.cpp create mode 100644 DefinitionEx/Token.h delete mode 100644 StarsEx/Parser.cpp delete mode 100644 StarsEx/Parser.h delete mode 100644 StarsEx/Term.cpp delete mode 100644 StarsEx/Term.h delete mode 100644 StarsEx/Token.cpp delete mode 100644 StarsEx/Token.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d89f0a..8622974 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ endif() add_subdirectory(contrib) add_subdirectory(data) add_subdirectory(Datafile) +add_subdirectory(DefinitionEx) add_subdirectory(FoundationEx) # add_subdirectory(Magic2) # TODO: AFX/MFC will be useless, port it to e.g., Qt right away. add_subdirectory(NetEx) diff --git a/DefinitionEx/CMakeLists.txt b/DefinitionEx/CMakeLists.txt new file mode 100644 index 0000000..9dd6620 --- /dev/null +++ b/DefinitionEx/CMakeLists.txt @@ -0,0 +1,15 @@ +project(DefinitionEx) +add_library( + DefinitionEx STATIC + Parser.cpp + Term.cpp + Token.cpp + ) +target_include_directories( + DefinitionEx + PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} + ) +target_link_libraries( + DefinitionEx + PUBLIC FoundationEx + ) diff --git a/DefinitionEx/Parser.cpp b/DefinitionEx/Parser.cpp new file mode 100644 index 0000000..09827cf --- /dev/null +++ b/DefinitionEx/Parser.cpp @@ -0,0 +1,307 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Implementation of the generic Parser class +*/ + +#include "Reader.h" +#include "Token.h" +#include "Parser.h" +#include "Term.h" +#include "Utils.h" +#include +#include + +enum KEYS { KEY_TRUE, KEY_FALSE, KEY_DEF, KEY_MINUS }; + +static int dump_tokens = 0; + +// +-------------------------------------------------------------------+ + +Term* error(char* msg, const Token& token) +{ + static char buf[1024]; + sprintf_s(buf, " near '%s' in line %d.", (const char*) token.symbol(), token.line()); + + return error(msg, buf); +} + +// +-------------------------------------------------------------------+ + +Parser::Parser(Reader* r) +{ + reader = r ? r : new ConsoleReader; + lexer = new Scanner(reader); + + Token::addKey("true", KEY_TRUE); + Token::addKey("false", KEY_FALSE); + Token::addKey(":", KEY_DEF); + Token::addKey("-", KEY_MINUS); +} + +Parser::~Parser() +{ + delete lexer; + delete reader; + //Token::close(); +} + +Term* +Parser::ParseTerm() +{ + Term* t = ParseTermBase(); + if (t == 0) return t; + + Term* t2 = ParseTermRest(t); + + return t2; +} + +Term* +Parser::ParseTermRest(Term* base) +{ + Token t = lexer->Get(); + + switch (t.type()) { + default: + lexer->PutBack(); + return base; + + case Token::StringLiteral: { + // concatenate adjacent string literal tokens: + TermText* text = base->isText(); + if (text) { + TermText* base2 = new TermText(text->value() + t.symbol()(1, t.symbol().length()-2)); + delete base; + return ParseTermRest(base2); + } + else { + lexer->PutBack(); + } + } + break; + + case Token::Keyword: + switch (t.key()) { + case KEY_DEF: + if (base->isText()) + return new TermDef(base->isText(), ParseTerm()); + else + return error("(Parse) illegal lhs in def", t); + + default: + lexer->PutBack(); + return base; + } + break; + } + + return base; +} + +static int xtol(const char* p) +{ + int n = 0; + + while (*p) { + char digit = *p++; + n *= 16; + + if (digit >= '0' && digit <= '9') + n += digit - '0'; + + else if (digit >= 'a' && digit <= 'f') + n += digit - 'a' + 10; + + else if (digit >= 'A' && digit <= 'F') + n += digit - 'A' + 10; + } + + return n; +} + +Term* +Parser::ParseTermBase() +{ + Token t = lexer->Get(); + int n = 0; + double d = 0.0; + + switch (t.type()) { + case Token::IntLiteral: { + if (dump_tokens) + Print("%s", t.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) t.symbol().length(); i++) + if (t.symbol()[i] != '_') + *p++ = t.symbol()[i]; + *p++ = '\0'; + + // handle hex notation: + if (nstr[1] == 'x') + n = xtol(nstr+2); + + else + n = atol(nstr); + + return new TermNumber(n); + } + + case Token::FloatLiteral: { + if (dump_tokens) + Print("%s", t.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) t.symbol().length(); i++) + if (t.symbol()[i] != '_') + *p++ = t.symbol()[i]; + *p++ = '\0'; + + d = atof(nstr); + return new TermNumber(d); + } + + case Token::StringLiteral: + if (dump_tokens) + Print("%s", t.symbol().data()); + + return new TermText(t.symbol()(1, t.symbol().length()-2)); + + case Token::AlphaIdent: + if (dump_tokens) + Print("%s", t.symbol().data()); + + return new TermText(t.symbol()); + + case Token::Keyword: + if (dump_tokens) + Print("%s", t.symbol().data()); + + switch (t.key()) { + case KEY_FALSE: return new TermBool(0); + case KEY_TRUE: return new TermBool(1); + + case KEY_MINUS: { + Token next = lexer->Get(); + if (next.type() == Token::IntLiteral) { + if (dump_tokens) + Print("%s", next.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) next.symbol().length(); i++) + if (next.symbol()[i] != '_') + *p++ = next.symbol()[i]; + *p++ = '\0'; + + n = -1 * atol(nstr); + return new TermNumber(n); + } + else if (next.type() == Token::FloatLiteral) { + if (dump_tokens) + Print("%s", next.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) next.symbol().length(); i++) + if (next.symbol()[i] != '_') + *p++ = next.symbol()[i]; + *p++ = '\0'; + + d = -1.0 * atof(nstr); + return new TermNumber(d); + } + else { + lexer->PutBack(); + return error("(Parse) illegal token '-': number expected", next); + } + } + break; + + default: + lexer->PutBack(); + return 0; + } + + case Token::LParen: return ParseArray(); + + case Token::LBrace: return ParseStruct(); + + case Token::CharLiteral: + return error("(Parse) illegal token ", t); + + default: + lexer->PutBack(); + return 0; + } +} + +TermArray* +Parser::ParseArray() +{ + TermList* elems = ParseTermList(0); + Token end = lexer->Get(); + + if (end.type() != Token::RParen) + return (TermArray*) error("(Parse) ')' missing in array-decl", end); + + return new TermArray(elems); +} + +TermStruct* +Parser::ParseStruct() +{ + TermList* elems = ParseTermList(1); + Token end = lexer->Get(); + + if (end.type() != Token::RBrace) + return (TermStruct*) error("(Parse) '}' missing in struct", end); + + return new TermStruct(elems); +} + +TermList* +Parser::ParseTermList(int for_struct) +{ + TermList* tlist = new TermList; + + Term* term = ParseTerm(); + while (term) { + if (for_struct && !term->isDef()) { + return (TermList*) error("(Parse) non-definition term in struct"); + } + else if (!for_struct && term->isDef()) { + return (TermList*) error("(Parse) illegal definition in array"); + } + + tlist->append(term); + Token t = lexer->Get(); + + /*** OLD WAY: COMMA SEPARATORS REQUIRED *** + if (t.type() != Token::Comma) { + lexer->PutBack(); + term = 0; + } + else + term = ParseTerm(); + /*******************************************/ + + // NEW WAY: COMMA SEPARATORS OPTIONAL: + if (t.type() != Token::Comma) { + lexer->PutBack(); + } + + term = ParseTerm(); + } + + return tlist; +} + + + diff --git a/DefinitionEx/Parser.h b/DefinitionEx/Parser.h new file mode 100644 index 0000000..84fe268 --- /dev/null +++ b/DefinitionEx/Parser.h @@ -0,0 +1,45 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Declaration of the generic Parser class +*/ + +#ifndef PARSER_H +#define PARSER_H + +#include "Text.h" +#include "Term.h" + +// +-------------------------------------------------------------------+ + +class Reader; +class Scanner; + +// +-------------------------------------------------------------------+ + +class Parser +{ +public: + Parser(Reader* r = 0); + ~Parser(); + + Term* ParseTerm(); + Term* ParseTermBase(); + Term* ParseTermRest(Term* base); + TermList* ParseTermList(int for_struct); + TermArray* ParseArray(); + TermStruct* ParseStruct(); + +private: + Reader* reader; + Scanner* lexer; +}; + +#endif diff --git a/DefinitionEx/Term.cpp b/DefinitionEx/Term.cpp new file mode 100644 index 0000000..acd2c74 --- /dev/null +++ b/DefinitionEx/Term.cpp @@ -0,0 +1,119 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Implementation of the Term class +*/ + + +#include "Term.h" +#include "Utils.h" + +// +-------------------------------------------------------------------+ + +Term* +error(char* s1, char* s2) +{ + Print("ERROR: "); + if (s1) Print(s1); + if (s2) Print(s2); + Print("\n\n"); + return 0; +} + +// +-------------------------------------------------------------------+ + +void TermBool::print(int level) { if (level > 0) Print(val? "true" : "false"); else Print("..."); } +void TermNumber::print(int level){ if (level > 0) Print("%g", val); else Print("..."); } +void TermText::print(int level) { if (level > 0) Print("\"%s\"", val.data()); else Print("..."); } + +// +-------------------------------------------------------------------+ + +TermArray::TermArray(TermList* elist) +{ + elems = elist; +} + +TermArray::~TermArray() +{ + if (elems) elems->destroy(); + delete elems; +} + +void +TermArray::print(int level) +{ + if (level > 1) { + Print("("); + + if (elems) { + for (int i = 0; i < elems->size(); i++) { + elems->at(i)->print(level-1); + if (i < elems->size() -1) + Print(", "); + } + } + + Print(") "); + } + else Print("(...) "); +} + +// +-------------------------------------------------------------------+ + +TermStruct::TermStruct(TermList* elist) +{ + elems = elist; +} + +TermStruct::~TermStruct() +{ + if (elems) elems->destroy(); + delete elems; +} + +void +TermStruct::print(int level) +{ + if (level > 1) { + Print("{"); + + if (elems) { + for (int i = 0; i < elems->size(); i++) { + elems->at(i)->print(level-1); + if (i < elems->size() -1) + Print(", "); + } + } + + Print("} "); + } + else Print("{...} "); +} + +// +-------------------------------------------------------------------+ + +TermDef::~TermDef() +{ + delete mname; + delete mval; +} + +void +TermDef::print(int level) +{ + if (level >= 0) { + mname->print(level); + Print(": "); + mval->print(level-1); + } + else Print("..."); +} + +// +-------------------------------------------------------------------+ diff --git a/DefinitionEx/Term.h b/DefinitionEx/Term.h new file mode 100644 index 0000000..79e2fc3 --- /dev/null +++ b/DefinitionEx/Term.h @@ -0,0 +1,171 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Declaration of the Abstract Syntax Tree classes +*/ + + +#ifndef TERM_H +#define TERM_H + +#include "Text.h" +#include "List.h" + +// +-------------------------------------------------------------------+ + +class Term; +class TermBool; +class TermNumber; +class TermText; +class TermArray; +class TermDef; +class TermStruct; + +// +-------------------------------------------------------------------+ + +class Term +{ +public: + static const char* TYPENAME() { return "Term"; } + + Term() { } + virtual ~Term() { } + + virtual int operator==(const Term& rhs) const { return 0; } + + virtual void print(int level=10) { } + + // conversion tests + virtual Term* touch() { return this; } + virtual TermBool* isBool() { return 0; } + virtual TermNumber* isNumber() { return 0; } + virtual TermText* isText() { return 0; } + virtual TermArray* isArray() { return 0; } + virtual TermDef* isDef() { return 0; } + virtual TermStruct* isStruct() { return 0; } +}; + +Term* error(char*, char* = 0); + +// +-------------------------------------------------------------------+ + +typedef List TermList; +typedef ListIter TermListIter; + +// +-------------------------------------------------------------------+ + +class TermBool : public Term +{ +public: + static const char* TYPENAME() { return "TermBool"; } + + TermBool(bool v) : val(v) { } + + virtual void print(int level=10); + virtual TermBool* isBool() { return this; } + bool value() const { return val; } + +private: + bool val; +}; + +// +-------------------------------------------------------------------+ + +class TermNumber : public Term +{ +public: + static const char* TYPENAME() { return "TermNumber"; } + + TermNumber(double v) : val(v) { } + + virtual void print(int level=10); + virtual TermNumber* isNumber() { return this; } + double value() const { return val; } + +private: + double val; +}; + +// +-------------------------------------------------------------------+ + +class TermText : public Term +{ +public: + static const char* TYPENAME() { return "TermText"; } + + TermText(const Text& v) : val(v) { } + + virtual void print(int level=10); + virtual TermText* isText() { return this; } + Text value() const { return val; } + +private: + Text val; +}; + +// +-------------------------------------------------------------------+ + +class TermArray : public Term +{ +public: + static const char* TYPENAME() { return "TermArray"; } + + TermArray(TermList* elist); + virtual ~TermArray(); + + virtual void print(int level=10); + virtual TermArray* isArray() { return this; } + TermList* elements() { return elems; } + +private: + TermList* elems; +}; + +// +-------------------------------------------------------------------+ + +class TermStruct : public Term +{ +public: + static const char* TYPENAME() { return "TermStruct"; } + + TermStruct(TermList* elist); + virtual ~TermStruct(); + + virtual void print(int level=10); + + virtual TermStruct* isStruct() { return this; } + TermList* elements() { return elems; } + +private: + TermList* elems; +}; + +// +-------------------------------------------------------------------+ + +class TermDef : public Term +{ +public: + static const char* TYPENAME() { return "TermDef"; } + + TermDef(TermText* n, Term* v) : mname(n), mval(v) { } + virtual ~TermDef(); + + virtual void print(int level=10); + virtual TermDef* isDef() { return this; } + + virtual TermText* name() { return mname; } + virtual Term* term() { return mval; } + +private: + TermText* mname; + Term* mval; +}; + +#endif diff --git a/DefinitionEx/Token.cpp b/DefinitionEx/Token.cpp new file mode 100644 index 0000000..2cc97b5 --- /dev/null +++ b/DefinitionEx/Token.cpp @@ -0,0 +1,544 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Scanner class implementation +*/ + +#include "Token.h" +#include "Reader.h" +#include "Text.h" + +#include + +// +-------------------------------------------------------------------+ + +bool Token::hidecom = true; +char Token::combeg[3] = "//"; +char Token::comend[3] = "\n"; +char Token::altbeg[3] = "/*"; +char Token::altend[3] = "*/"; +Dictionary Token::keymap; + +// +-------------------------------------------------------------------+ + +Token::Token() + : mType(Undefined), mKey(0), mLine(0), mColumn(0) +{ + mLength = 0; + mSymbol[0] = '\0'; +} + +Token::Token(const Token& rhs) + : mType(rhs.mType), mKey(rhs.mKey), mLine(rhs.mLine), mColumn(rhs.mColumn) +{ + mLength = rhs.mLength; + if (mLength < 8) { + strcpy_s(mSymbol, rhs.mSymbol); + } + else { + mFullSymbol = new char[mLength + 1]; + strcpy(mFullSymbol, rhs.mFullSymbol); + } +} + +Token::Token(int t) + : mType(t), mKey(0), mLine(0), mColumn(0) +{ + mLength = 0; + mSymbol[0] = '\0'; +} + +Token::Token(const char* s, int t, int k, int l, int c) + : mType(t), mKey(k), mLine(l), mColumn(c) +{ + mLength = strlen(s); + if (mLength < 8) { + strcpy_s(mSymbol, s); + } + else { + mFullSymbol = new char[mLength + 1]; + strcpy(mFullSymbol, s); + } +} + +Token::Token(const Text& s, int t, int k, int l, int c) + : mType(t), mKey(k), mLine(l), mColumn(c) +{ + mLength = s.length(); + if (mLength < 8) { + strcpy_s(mSymbol, s.data()); + } + else { + mFullSymbol = new char[mLength + 1]; + strcpy(mFullSymbol, s.data()); + } +} + +Token::~Token() +{ + if (mLength >= 8) + delete [] mFullSymbol; +} + +// +-------------------------------------------------------------------+ + +void +Token::close() +{ + keymap.clear(); +} + +// +-------------------------------------------------------------------+ + +Token& +Token::operator = (const Token& rhs) +{ + if (mLength >= 8) + delete [] mFullSymbol; + + mLength = rhs.mLength; + if (mLength < 8) { + strcpy_s(mSymbol, rhs.mSymbol); + } + else { + mFullSymbol = new char[mLength + 1]; + strcpy(mFullSymbol, rhs.mFullSymbol); + } + + mType = rhs.mType; + mKey = rhs.mKey; + mLine = rhs.mLine; + mColumn = rhs.mColumn; + + return *this; +} + +// +-------------------------------------------------------------------+ + +bool +Token::match(const Token& ref) const +{ + if (mType == ref.mType) { // if types match + if (ref.mLength == 0) // if no symbol to match + return true; // match! + + else if (mLength == ref.mLength) { // else if symbols match + if (mLength < 8) { + if (!strcmp(mSymbol, ref.mSymbol)) + return true; // match! + } + else { + if (!strcmp(mFullSymbol, ref.mFullSymbol)) + return true; // match! + } + } + } + + return false; +} + +// +-------------------------------------------------------------------+ + +Text +Token::symbol() const +{ + if (mLength < 8) + return Text(mSymbol); + else + return Text(mFullSymbol); +} + +// +-------------------------------------------------------------------+ + +void +Token::addKey(const Text& k, int v) +{ + keymap.insert(k, v); +} + +// +-------------------------------------------------------------------+ + +void +Token::addKeys(Dictionary& keys) +{ + DictionaryIter iter = keys; + while (++iter) + keymap.insert(iter.key(), iter.value()); +} + +// +-------------------------------------------------------------------+ + +bool +Token::findKey(const Text& k, int& v) +{ + if (keymap.contains(k)) { + v = keymap.find(k, 0); + return true; + } + else + return false; +} + +// +-------------------------------------------------------------------+ + +void +Token::comments(const Text& begin, const Text& end) +{ + combeg[0] = begin(0); + if (begin.length() > 1) combeg[1] = begin(1); + else combeg[1] = '\0'; + + comend[0] = end(0); + if (end.length() > 1) comend[1] = end(1); + else comend[1] = '\0'; +} + +// +-------------------------------------------------------------------+ + +void +Token::altComments(const Text& begin, const Text& end) +{ + altbeg[0] = begin(0); + if (begin.length() > 1) altbeg[1] = begin(1); + else altbeg[1] = '\0'; + + altend[0] = end(0); + if (end.length() > 1) altend[1] = end(1); + else altend[1] = '\0'; +} + +// +-------------------------------------------------------------------+ + +Text +Token::typestr() const +{ + Text t = "Unknown"; + switch (type()) { + case Undefined: t = "Undefined"; break; + case Keyword: t = "Keyword"; break; + case AlphaIdent: t = "AlphaIdent"; break; + case SymbolicIdent: t = "SymbolicIdent"; break; + case Comment: t = "Comment"; break; + case IntLiteral: t = "IntLiteral"; break; + case FloatLiteral: t = "FloatLiteral"; break; + case StringLiteral: t = "StringLiteral"; break; + case CharLiteral: t = "CharLiteral"; break; + case Dot: t = "Dot"; break; + case Comma: t = "Comma"; break; + case Colon: t = "Colon"; break; + case Semicolon: t = "Semicolon"; break; + case LParen: t = "LParen"; break; + case RParen: t = "RParen"; break; + case LBracket: t = "LBracket"; break; + case RBracket: t = "RBracket"; break; + case LBrace: t = "LBrace"; break; + case RBrace: t = "RBrace"; break; + case EOT: t = "EOT"; break; + case LastTokenType: t = "LastTokenType"; break; + } + + return t; +} + +// +-------------------------------------------------------------------+ + +Text +Token::describe(const Text& tok) +{ + Text d; + + switch (tok(0)) { + case '.' : d = "Token::Dot"; break; + case ',' : d = "Token::Comma"; break; + case ';' : d = "Token::Semicolon"; break; + case '(' : d = "Token::LParen"; break; + case ')' : d = "Token::RParen"; break; + case '[' : d = "Token::LBracket"; break; + case ']' : d = "Token::RBracket"; break; + case '{' : d = "Token::LBrace"; break; + case '}' : d = "Token::RBrace"; break; + default : break; + } + + if (d.length() == 0) { + if (isalpha(tok(0))) + d = "\"" + tok + "\", Token::AlphaIdent"; + else if (isdigit(tok(0))) { + if (tok.contains(".")) + d = "\"" + tok + "\", Token::FloatLiteral"; + else + d = "\"" + tok + "\", Token::IntLiteral"; + } + else + d = "\"" + tok + "\", Token::SymbolicIdent"; + } + + return d; +} + +// +-------------------------------------------------------------------+ + +Scanner::Scanner(Reader* r) + : reader(r), str(0), index(0), old_index(0), + length(0), line(0), old_line(0), lineStart(0) +{ } + +Scanner::Scanner(const Scanner& rhs) + : index(rhs.index), old_index(rhs.old_index), length(rhs.length), + reader(rhs.reader), + line(rhs.line), old_line(0), lineStart(rhs.lineStart) +{ + str = new char [strlen(rhs.str) + 1]; + strcpy(str, rhs.str); +} + +Scanner::Scanner(const Text& s) + : reader(0), index(0), old_index(0), length(s.length()), line(0), + old_line(0), lineStart(0) +{ + str = new char [s.length() + 1]; + strcpy(str, s.data()); +} + +Scanner::~Scanner() +{ + delete [] str; +} + +// +-------------------------------------------------------------------+ + +Scanner& +Scanner::operator = (const Scanner& rhs) +{ + delete [] str; + str = new char [strlen(rhs.str) + 1]; + strcpy(str, rhs.str); + + index = rhs.index; + old_index = rhs.old_index; + length = rhs.length; + line = rhs.line; + old_line = rhs.old_line; + lineStart = rhs.lineStart; + + return *this; +} + +// +-------------------------------------------------------------------+ + +void +Scanner::Load(const Text& s) +{ + delete [] str; + str = new char [s.length() + 1]; + strcpy(str, s.data()); + + index = 0; + old_index = 0; + best = Token(); + length = s.length(); + line = 0; + old_line = 0; + lineStart = 0; +} + +// +-------------------------------------------------------------------+ + +Token +Scanner::Get(Need need) +{ + int type = Token::EOT; + old_index = index; + old_line = line; + + eos = str + length; + p = str + index; + + if (p >= eos) { + if (need == Demand && reader) { + Load(reader->more()); + if (length > 0) + return Get(need); + } + return Token("", type, 0, line, 0); + } + + while (isspace(*p) && p < eos) { // skip initial white space + if (*p == '\n') { + line++; + lineStart = p - str; + } + p++; + } + + if (p >= eos) { + if (need == Demand && reader) { + Load(reader->more()); + if (length > 0) + return Get(need); + } + return Token("", type, 0, line, 0); + } + + Token result; + size_t start = p - str; + + if (*p == '"' || *p == '\'') { // special case for quoted tokens + + if (*p == '"') type = Token::StringLiteral; + else type = Token::CharLiteral; + + char match = *p; + while (++p < eos) { + if (*p == match) { // find matching quote + if (*(p-1) != '\\') { // if not escaped + p++; // token includes matching quote + break; + } + } + } + } + + // generic delimited comments + else if (*p == Token::comBeg(0) && + (!Token::comBeg(1) || *(p+1) == Token::comBeg(1))) { + type = Token::Comment; + while (++p < eos) { + if (*p == Token::comEnd(0) && + (!Token::comEnd(1) || *(p+1) == Token::comEnd(1))) { + p++; if (Token::comEnd(1)) p++; + break; + } + } + } + + // alternate form delimited comments + else if (*p == Token::altBeg(0) && + (!Token::altBeg(1) || *(p+1) == Token::altBeg(1))) { + type = Token::Comment; + while (++p < eos) { + if (*p == Token::altEnd(0) && + (!Token::altEnd(1) || *(p+1) == Token::altEnd(1))) { + p++; if (Token::altEnd(1)) p++; + break; + } + } + } + + else if (*p == '.') type = Token::Dot; + else if (*p == ',') type = Token::Comma; + else if (*p == ';') type = Token::Semicolon; + else if (*p == '(') type = Token::LParen; + else if (*p == ')') type = Token::RParen; + else if (*p == '[') type = Token::LBracket; + else if (*p == ']') type = Token::RBracket; + else if (*p == '{') type = Token::LBrace; + else if (*p == '}') type = Token::RBrace; + + // use lexical sub-parser for ints and floats + else if (isdigit(*p)) + type = GetNumeric(); + + else if (IsSymbolic(*p)) { + type = Token::SymbolicIdent; + while (IsSymbolic(*p)) p++; + } + + else { + type = Token::AlphaIdent; + while (IsAlpha(*p)) p++; + } + + size_t extent = (p - str) - start; + + if (extent < 1) extent = 1; // always get at least one character + + index = start + extent; // advance the cursor + int col = start - lineStart; + if (line == 0) col++; + + char* buf = new char [extent + 1]; + strncpy(buf, str + start, extent); + buf[extent] = '\0'; + + if (type == Token::Comment && Token::hidecom) { + delete [] buf; + if (Token::comEnd(0) == '\n') { + line++; + lineStart = p - str; + } + return Get(need); + } + + if (type == Token::AlphaIdent || // check for keyword + type == Token::SymbolicIdent) { + int val; + if (Token::findKey(Text(buf), val)) + result = Token(buf, Token::Keyword, val, line+1, col); + } + + if (result.mType != Token::Keyword) + result = Token(buf, type, 0, line+1, col); + + if (line+1 > (size_t) best.mLine || + (line+1 == (size_t) best.mLine && col > best.mColumn)) + best = result; + + delete [] buf; + return result; +} + +// +-------------------------------------------------------------------+ + +int +Scanner::GetNumeric() +{ + int type = Token::IntLiteral; // assume int + + if (*p == '0' && *(p+1) == 'x') { // check for hex: + p += 2; + while (isxdigit(*p)) p++; + return type; + } + + while (isdigit(*p) || *p == '_') p++; // whole number part + + if (*p == '.') { p++; // optional fract part + type = Token::FloatLiteral; // implies float + + while (isdigit(*p) || *p == '_') p++; // fractional part + } + + if (*p == 'E' || *p == 'e') { p++; // optional exponent + if (*p == '+' || *p == '-') p++; // which may be signed + while (isdigit(*p)) p++; + + type = Token::FloatLiteral; // implies float + } + + return type; +} + +// +-------------------------------------------------------------------+ + +bool +Scanner::IsAlpha(char c) +{ + return (isalpha(*p) || isdigit(*p) || (*p == '_'))?true:false; +} + +// +-------------------------------------------------------------------+ + +bool +Scanner::IsSymbolic(char c) +{ + const char* s = "+-*/\\<=>~!@#$%^&|:"; + return strchr(s, c)?true:false; +} diff --git a/DefinitionEx/Token.h b/DefinitionEx/Token.h new file mode 100644 index 0000000..bd3723b --- /dev/null +++ b/DefinitionEx/Token.h @@ -0,0 +1,145 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Scanner class definition +*/ + +#ifndef Token_h +#define Token_h + +#include "Text.h" +#include "Dictionary.h" + +#pragma warning( disable : 4237) + +// +-------------------------------------------------------------------+ + +class Reader; +class Token; +class Scanner; + +// +-------------------------------------------------------------------+ + +class Token +{ + friend class Scanner; + +public: + // keywords must be alphanumeric identifiers or symbolic identifiers + enum Types { Undefined, Keyword, AlphaIdent, SymbolicIdent, Comment, + IntLiteral, FloatLiteral, StringLiteral, CharLiteral, + Dot, Comma, Colon, Semicolon, + LParen, RParen, LBracket, RBracket, LBrace, RBrace, + EOT, LastTokenType }; + + enum Alias { CompoundSeparator = Dot, + ItemSeparator = Comma, + StatementTerminator = Semicolon, + TypeIndicator = Colon, + Lambda = LastTokenType + 1 }; + + Token(); + Token(const Token& rhs); + Token(int t); + Token(const char* s, int t, int k=0, int l=0, int c=0); + Token(const Text& s, int t, int k=0, int l=0, int c=0); + ~Token(); + + Token& operator = (const Token& rhs); + + bool match(const Token& ref) const; + + Text symbol() const; + int type() const { return mType; } + int key() const { return mKey; } + int line() const { return mLine; } + int column() const { return mColumn; } + + Text typestr() const; + + static Text describe(const Text& tok); + static void addKey(const Text& k, int v); + static void addKeys(Dictionary& keys); + static bool findKey(const Text& k, int& v); + static void comments(const Text& begin, const Text& end); + static void altComments(const Text& begin, const Text& end); + static void hideComments(bool hide = true) { hidecom = hide; } + + static char comBeg(unsigned int i) { return combeg[i]; } + static char comEnd(unsigned int i) { return comend[i]; } + static char altBeg(unsigned int i) { return altbeg[i]; } + static char altEnd(unsigned int i) { return altend[i]; } + + static void close(); + +protected: + int mLength; + union { + char mSymbol[8]; + char* mFullSymbol; + }; + int mType; + int mKey; + int mLine; + int mColumn; + + static bool hidecom; + static char combeg[3]; + static char comend[3]; + static char altbeg[3]; + static char altend[3]; + + static Dictionary keymap; +}; + +// +-------------------------------------------------------------------+ + +class Scanner +{ +public: + Scanner(Reader* r = 0); + Scanner(const Text& s); + Scanner(const Scanner& rhs); + virtual ~Scanner(); + + Scanner& operator = (const Scanner& rhs); + + void Load(const Text& s); + + enum Need { Demand, Request }; + virtual Token Get(Need n = Demand); + + void PutBack() { index = old_index; line = old_line; } + int GetCursor() { return index; } + int GetLine() { return line; } + void Reset(int c, int l) { index = old_index = c; line = old_line = l; } + Token Best() const { return best; } + +protected: + virtual int GetNumeric(); + virtual bool IsSymbolic(char c); + virtual bool IsAlpha(char c); + + Reader* reader; + char* str; + + const char* p; + const char* eos; + + size_t index; + size_t old_index; + Token best; + size_t length; + size_t line; + size_t old_line; + size_t lineStart; +}; + +#endif // TOKEN_H diff --git a/StarsEx/CMakeLists.txt b/StarsEx/CMakeLists.txt index d734420..f637326 100644 --- a/StarsEx/CMakeLists.txt +++ b/StarsEx/CMakeLists.txt @@ -187,7 +187,6 @@ add_library( PCX.CPP Panic.cpp ParseUtil.cpp - Parser.cpp Particles.cpp Physical.cpp PlanScreen.cpp @@ -248,7 +247,6 @@ add_library( TacRefDlg.cpp TacticalAI.cpp TacticalView.cpp - Term.cpp Terrain.cpp TerrainApron.cpp TerrainClouds.cpp @@ -258,7 +256,6 @@ add_library( TexCubeDX9.cpp TexDX9.cpp Thruster.cpp - Token.cpp TrackIR.cpp Trail.cpp VidDlg.cpp @@ -284,6 +281,7 @@ target_include_directories( ) target_link_libraries( StarsEx + PUBLIC DefinitionEx PUBLIC FoundationEx PUBLIC NetEx PUBLIC Zlib::zlib diff --git a/StarsEx/Parser.cpp b/StarsEx/Parser.cpp deleted file mode 100644 index 09827cf..0000000 --- a/StarsEx/Parser.cpp +++ /dev/null @@ -1,307 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Implementation of the generic Parser class -*/ - -#include "Reader.h" -#include "Token.h" -#include "Parser.h" -#include "Term.h" -#include "Utils.h" -#include -#include - -enum KEYS { KEY_TRUE, KEY_FALSE, KEY_DEF, KEY_MINUS }; - -static int dump_tokens = 0; - -// +-------------------------------------------------------------------+ - -Term* error(char* msg, const Token& token) -{ - static char buf[1024]; - sprintf_s(buf, " near '%s' in line %d.", (const char*) token.symbol(), token.line()); - - return error(msg, buf); -} - -// +-------------------------------------------------------------------+ - -Parser::Parser(Reader* r) -{ - reader = r ? r : new ConsoleReader; - lexer = new Scanner(reader); - - Token::addKey("true", KEY_TRUE); - Token::addKey("false", KEY_FALSE); - Token::addKey(":", KEY_DEF); - Token::addKey("-", KEY_MINUS); -} - -Parser::~Parser() -{ - delete lexer; - delete reader; - //Token::close(); -} - -Term* -Parser::ParseTerm() -{ - Term* t = ParseTermBase(); - if (t == 0) return t; - - Term* t2 = ParseTermRest(t); - - return t2; -} - -Term* -Parser::ParseTermRest(Term* base) -{ - Token t = lexer->Get(); - - switch (t.type()) { - default: - lexer->PutBack(); - return base; - - case Token::StringLiteral: { - // concatenate adjacent string literal tokens: - TermText* text = base->isText(); - if (text) { - TermText* base2 = new TermText(text->value() + t.symbol()(1, t.symbol().length()-2)); - delete base; - return ParseTermRest(base2); - } - else { - lexer->PutBack(); - } - } - break; - - case Token::Keyword: - switch (t.key()) { - case KEY_DEF: - if (base->isText()) - return new TermDef(base->isText(), ParseTerm()); - else - return error("(Parse) illegal lhs in def", t); - - default: - lexer->PutBack(); - return base; - } - break; - } - - return base; -} - -static int xtol(const char* p) -{ - int n = 0; - - while (*p) { - char digit = *p++; - n *= 16; - - if (digit >= '0' && digit <= '9') - n += digit - '0'; - - else if (digit >= 'a' && digit <= 'f') - n += digit - 'a' + 10; - - else if (digit >= 'A' && digit <= 'F') - n += digit - 'A' + 10; - } - - return n; -} - -Term* -Parser::ParseTermBase() -{ - Token t = lexer->Get(); - int n = 0; - double d = 0.0; - - switch (t.type()) { - case Token::IntLiteral: { - if (dump_tokens) - Print("%s", t.symbol().data()); - - char nstr[256], *p = nstr; - for (int i = 0; i < (int) t.symbol().length(); i++) - if (t.symbol()[i] != '_') - *p++ = t.symbol()[i]; - *p++ = '\0'; - - // handle hex notation: - if (nstr[1] == 'x') - n = xtol(nstr+2); - - else - n = atol(nstr); - - return new TermNumber(n); - } - - case Token::FloatLiteral: { - if (dump_tokens) - Print("%s", t.symbol().data()); - - char nstr[256], *p = nstr; - for (int i = 0; i < (int) t.symbol().length(); i++) - if (t.symbol()[i] != '_') - *p++ = t.symbol()[i]; - *p++ = '\0'; - - d = atof(nstr); - return new TermNumber(d); - } - - case Token::StringLiteral: - if (dump_tokens) - Print("%s", t.symbol().data()); - - return new TermText(t.symbol()(1, t.symbol().length()-2)); - - case Token::AlphaIdent: - if (dump_tokens) - Print("%s", t.symbol().data()); - - return new TermText(t.symbol()); - - case Token::Keyword: - if (dump_tokens) - Print("%s", t.symbol().data()); - - switch (t.key()) { - case KEY_FALSE: return new TermBool(0); - case KEY_TRUE: return new TermBool(1); - - case KEY_MINUS: { - Token next = lexer->Get(); - if (next.type() == Token::IntLiteral) { - if (dump_tokens) - Print("%s", next.symbol().data()); - - char nstr[256], *p = nstr; - for (int i = 0; i < (int) next.symbol().length(); i++) - if (next.symbol()[i] != '_') - *p++ = next.symbol()[i]; - *p++ = '\0'; - - n = -1 * atol(nstr); - return new TermNumber(n); - } - else if (next.type() == Token::FloatLiteral) { - if (dump_tokens) - Print("%s", next.symbol().data()); - - char nstr[256], *p = nstr; - for (int i = 0; i < (int) next.symbol().length(); i++) - if (next.symbol()[i] != '_') - *p++ = next.symbol()[i]; - *p++ = '\0'; - - d = -1.0 * atof(nstr); - return new TermNumber(d); - } - else { - lexer->PutBack(); - return error("(Parse) illegal token '-': number expected", next); - } - } - break; - - default: - lexer->PutBack(); - return 0; - } - - case Token::LParen: return ParseArray(); - - case Token::LBrace: return ParseStruct(); - - case Token::CharLiteral: - return error("(Parse) illegal token ", t); - - default: - lexer->PutBack(); - return 0; - } -} - -TermArray* -Parser::ParseArray() -{ - TermList* elems = ParseTermList(0); - Token end = lexer->Get(); - - if (end.type() != Token::RParen) - return (TermArray*) error("(Parse) ')' missing in array-decl", end); - - return new TermArray(elems); -} - -TermStruct* -Parser::ParseStruct() -{ - TermList* elems = ParseTermList(1); - Token end = lexer->Get(); - - if (end.type() != Token::RBrace) - return (TermStruct*) error("(Parse) '}' missing in struct", end); - - return new TermStruct(elems); -} - -TermList* -Parser::ParseTermList(int for_struct) -{ - TermList* tlist = new TermList; - - Term* term = ParseTerm(); - while (term) { - if (for_struct && !term->isDef()) { - return (TermList*) error("(Parse) non-definition term in struct"); - } - else if (!for_struct && term->isDef()) { - return (TermList*) error("(Parse) illegal definition in array"); - } - - tlist->append(term); - Token t = lexer->Get(); - - /*** OLD WAY: COMMA SEPARATORS REQUIRED *** - if (t.type() != Token::Comma) { - lexer->PutBack(); - term = 0; - } - else - term = ParseTerm(); - /*******************************************/ - - // NEW WAY: COMMA SEPARATORS OPTIONAL: - if (t.type() != Token::Comma) { - lexer->PutBack(); - } - - term = ParseTerm(); - } - - return tlist; -} - - - diff --git a/StarsEx/Parser.h b/StarsEx/Parser.h deleted file mode 100644 index 84fe268..0000000 --- a/StarsEx/Parser.h +++ /dev/null @@ -1,45 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Declaration of the generic Parser class -*/ - -#ifndef PARSER_H -#define PARSER_H - -#include "Text.h" -#include "Term.h" - -// +-------------------------------------------------------------------+ - -class Reader; -class Scanner; - -// +-------------------------------------------------------------------+ - -class Parser -{ -public: - Parser(Reader* r = 0); - ~Parser(); - - Term* ParseTerm(); - Term* ParseTermBase(); - Term* ParseTermRest(Term* base); - TermList* ParseTermList(int for_struct); - TermArray* ParseArray(); - TermStruct* ParseStruct(); - -private: - Reader* reader; - Scanner* lexer; -}; - -#endif diff --git a/StarsEx/Term.cpp b/StarsEx/Term.cpp deleted file mode 100644 index acd2c74..0000000 --- a/StarsEx/Term.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Implementation of the Term class -*/ - - -#include "Term.h" -#include "Utils.h" - -// +-------------------------------------------------------------------+ - -Term* -error(char* s1, char* s2) -{ - Print("ERROR: "); - if (s1) Print(s1); - if (s2) Print(s2); - Print("\n\n"); - return 0; -} - -// +-------------------------------------------------------------------+ - -void TermBool::print(int level) { if (level > 0) Print(val? "true" : "false"); else Print("..."); } -void TermNumber::print(int level){ if (level > 0) Print("%g", val); else Print("..."); } -void TermText::print(int level) { if (level > 0) Print("\"%s\"", val.data()); else Print("..."); } - -// +-------------------------------------------------------------------+ - -TermArray::TermArray(TermList* elist) -{ - elems = elist; -} - -TermArray::~TermArray() -{ - if (elems) elems->destroy(); - delete elems; -} - -void -TermArray::print(int level) -{ - if (level > 1) { - Print("("); - - if (elems) { - for (int i = 0; i < elems->size(); i++) { - elems->at(i)->print(level-1); - if (i < elems->size() -1) - Print(", "); - } - } - - Print(") "); - } - else Print("(...) "); -} - -// +-------------------------------------------------------------------+ - -TermStruct::TermStruct(TermList* elist) -{ - elems = elist; -} - -TermStruct::~TermStruct() -{ - if (elems) elems->destroy(); - delete elems; -} - -void -TermStruct::print(int level) -{ - if (level > 1) { - Print("{"); - - if (elems) { - for (int i = 0; i < elems->size(); i++) { - elems->at(i)->print(level-1); - if (i < elems->size() -1) - Print(", "); - } - } - - Print("} "); - } - else Print("{...} "); -} - -// +-------------------------------------------------------------------+ - -TermDef::~TermDef() -{ - delete mname; - delete mval; -} - -void -TermDef::print(int level) -{ - if (level >= 0) { - mname->print(level); - Print(": "); - mval->print(level-1); - } - else Print("..."); -} - -// +-------------------------------------------------------------------+ diff --git a/StarsEx/Term.h b/StarsEx/Term.h deleted file mode 100644 index 79e2fc3..0000000 --- a/StarsEx/Term.h +++ /dev/null @@ -1,171 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Declaration of the Abstract Syntax Tree classes -*/ - - -#ifndef TERM_H -#define TERM_H - -#include "Text.h" -#include "List.h" - -// +-------------------------------------------------------------------+ - -class Term; -class TermBool; -class TermNumber; -class TermText; -class TermArray; -class TermDef; -class TermStruct; - -// +-------------------------------------------------------------------+ - -class Term -{ -public: - static const char* TYPENAME() { return "Term"; } - - Term() { } - virtual ~Term() { } - - virtual int operator==(const Term& rhs) const { return 0; } - - virtual void print(int level=10) { } - - // conversion tests - virtual Term* touch() { return this; } - virtual TermBool* isBool() { return 0; } - virtual TermNumber* isNumber() { return 0; } - virtual TermText* isText() { return 0; } - virtual TermArray* isArray() { return 0; } - virtual TermDef* isDef() { return 0; } - virtual TermStruct* isStruct() { return 0; } -}; - -Term* error(char*, char* = 0); - -// +-------------------------------------------------------------------+ - -typedef List TermList; -typedef ListIter TermListIter; - -// +-------------------------------------------------------------------+ - -class TermBool : public Term -{ -public: - static const char* TYPENAME() { return "TermBool"; } - - TermBool(bool v) : val(v) { } - - virtual void print(int level=10); - virtual TermBool* isBool() { return this; } - bool value() const { return val; } - -private: - bool val; -}; - -// +-------------------------------------------------------------------+ - -class TermNumber : public Term -{ -public: - static const char* TYPENAME() { return "TermNumber"; } - - TermNumber(double v) : val(v) { } - - virtual void print(int level=10); - virtual TermNumber* isNumber() { return this; } - double value() const { return val; } - -private: - double val; -}; - -// +-------------------------------------------------------------------+ - -class TermText : public Term -{ -public: - static const char* TYPENAME() { return "TermText"; } - - TermText(const Text& v) : val(v) { } - - virtual void print(int level=10); - virtual TermText* isText() { return this; } - Text value() const { return val; } - -private: - Text val; -}; - -// +-------------------------------------------------------------------+ - -class TermArray : public Term -{ -public: - static const char* TYPENAME() { return "TermArray"; } - - TermArray(TermList* elist); - virtual ~TermArray(); - - virtual void print(int level=10); - virtual TermArray* isArray() { return this; } - TermList* elements() { return elems; } - -private: - TermList* elems; -}; - -// +-------------------------------------------------------------------+ - -class TermStruct : public Term -{ -public: - static const char* TYPENAME() { return "TermStruct"; } - - TermStruct(TermList* elist); - virtual ~TermStruct(); - - virtual void print(int level=10); - - virtual TermStruct* isStruct() { return this; } - TermList* elements() { return elems; } - -private: - TermList* elems; -}; - -// +-------------------------------------------------------------------+ - -class TermDef : public Term -{ -public: - static const char* TYPENAME() { return "TermDef"; } - - TermDef(TermText* n, Term* v) : mname(n), mval(v) { } - virtual ~TermDef(); - - virtual void print(int level=10); - virtual TermDef* isDef() { return this; } - - virtual TermText* name() { return mname; } - virtual Term* term() { return mval; } - -private: - TermText* mname; - Term* mval; -}; - -#endif diff --git a/StarsEx/Token.cpp b/StarsEx/Token.cpp deleted file mode 100644 index 2cc97b5..0000000 --- a/StarsEx/Token.cpp +++ /dev/null @@ -1,544 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Scanner class implementation -*/ - -#include "Token.h" -#include "Reader.h" -#include "Text.h" - -#include - -// +-------------------------------------------------------------------+ - -bool Token::hidecom = true; -char Token::combeg[3] = "//"; -char Token::comend[3] = "\n"; -char Token::altbeg[3] = "/*"; -char Token::altend[3] = "*/"; -Dictionary Token::keymap; - -// +-------------------------------------------------------------------+ - -Token::Token() - : mType(Undefined), mKey(0), mLine(0), mColumn(0) -{ - mLength = 0; - mSymbol[0] = '\0'; -} - -Token::Token(const Token& rhs) - : mType(rhs.mType), mKey(rhs.mKey), mLine(rhs.mLine), mColumn(rhs.mColumn) -{ - mLength = rhs.mLength; - if (mLength < 8) { - strcpy_s(mSymbol, rhs.mSymbol); - } - else { - mFullSymbol = new char[mLength + 1]; - strcpy(mFullSymbol, rhs.mFullSymbol); - } -} - -Token::Token(int t) - : mType(t), mKey(0), mLine(0), mColumn(0) -{ - mLength = 0; - mSymbol[0] = '\0'; -} - -Token::Token(const char* s, int t, int k, int l, int c) - : mType(t), mKey(k), mLine(l), mColumn(c) -{ - mLength = strlen(s); - if (mLength < 8) { - strcpy_s(mSymbol, s); - } - else { - mFullSymbol = new char[mLength + 1]; - strcpy(mFullSymbol, s); - } -} - -Token::Token(const Text& s, int t, int k, int l, int c) - : mType(t), mKey(k), mLine(l), mColumn(c) -{ - mLength = s.length(); - if (mLength < 8) { - strcpy_s(mSymbol, s.data()); - } - else { - mFullSymbol = new char[mLength + 1]; - strcpy(mFullSymbol, s.data()); - } -} - -Token::~Token() -{ - if (mLength >= 8) - delete [] mFullSymbol; -} - -// +-------------------------------------------------------------------+ - -void -Token::close() -{ - keymap.clear(); -} - -// +-------------------------------------------------------------------+ - -Token& -Token::operator = (const Token& rhs) -{ - if (mLength >= 8) - delete [] mFullSymbol; - - mLength = rhs.mLength; - if (mLength < 8) { - strcpy_s(mSymbol, rhs.mSymbol); - } - else { - mFullSymbol = new char[mLength + 1]; - strcpy(mFullSymbol, rhs.mFullSymbol); - } - - mType = rhs.mType; - mKey = rhs.mKey; - mLine = rhs.mLine; - mColumn = rhs.mColumn; - - return *this; -} - -// +-------------------------------------------------------------------+ - -bool -Token::match(const Token& ref) const -{ - if (mType == ref.mType) { // if types match - if (ref.mLength == 0) // if no symbol to match - return true; // match! - - else if (mLength == ref.mLength) { // else if symbols match - if (mLength < 8) { - if (!strcmp(mSymbol, ref.mSymbol)) - return true; // match! - } - else { - if (!strcmp(mFullSymbol, ref.mFullSymbol)) - return true; // match! - } - } - } - - return false; -} - -// +-------------------------------------------------------------------+ - -Text -Token::symbol() const -{ - if (mLength < 8) - return Text(mSymbol); - else - return Text(mFullSymbol); -} - -// +-------------------------------------------------------------------+ - -void -Token::addKey(const Text& k, int v) -{ - keymap.insert(k, v); -} - -// +-------------------------------------------------------------------+ - -void -Token::addKeys(Dictionary& keys) -{ - DictionaryIter iter = keys; - while (++iter) - keymap.insert(iter.key(), iter.value()); -} - -// +-------------------------------------------------------------------+ - -bool -Token::findKey(const Text& k, int& v) -{ - if (keymap.contains(k)) { - v = keymap.find(k, 0); - return true; - } - else - return false; -} - -// +-------------------------------------------------------------------+ - -void -Token::comments(const Text& begin, const Text& end) -{ - combeg[0] = begin(0); - if (begin.length() > 1) combeg[1] = begin(1); - else combeg[1] = '\0'; - - comend[0] = end(0); - if (end.length() > 1) comend[1] = end(1); - else comend[1] = '\0'; -} - -// +-------------------------------------------------------------------+ - -void -Token::altComments(const Text& begin, const Text& end) -{ - altbeg[0] = begin(0); - if (begin.length() > 1) altbeg[1] = begin(1); - else altbeg[1] = '\0'; - - altend[0] = end(0); - if (end.length() > 1) altend[1] = end(1); - else altend[1] = '\0'; -} - -// +-------------------------------------------------------------------+ - -Text -Token::typestr() const -{ - Text t = "Unknown"; - switch (type()) { - case Undefined: t = "Undefined"; break; - case Keyword: t = "Keyword"; break; - case AlphaIdent: t = "AlphaIdent"; break; - case SymbolicIdent: t = "SymbolicIdent"; break; - case Comment: t = "Comment"; break; - case IntLiteral: t = "IntLiteral"; break; - case FloatLiteral: t = "FloatLiteral"; break; - case StringLiteral: t = "StringLiteral"; break; - case CharLiteral: t = "CharLiteral"; break; - case Dot: t = "Dot"; break; - case Comma: t = "Comma"; break; - case Colon: t = "Colon"; break; - case Semicolon: t = "Semicolon"; break; - case LParen: t = "LParen"; break; - case RParen: t = "RParen"; break; - case LBracket: t = "LBracket"; break; - case RBracket: t = "RBracket"; break; - case LBrace: t = "LBrace"; break; - case RBrace: t = "RBrace"; break; - case EOT: t = "EOT"; break; - case LastTokenType: t = "LastTokenType"; break; - } - - return t; -} - -// +-------------------------------------------------------------------+ - -Text -Token::describe(const Text& tok) -{ - Text d; - - switch (tok(0)) { - case '.' : d = "Token::Dot"; break; - case ',' : d = "Token::Comma"; break; - case ';' : d = "Token::Semicolon"; break; - case '(' : d = "Token::LParen"; break; - case ')' : d = "Token::RParen"; break; - case '[' : d = "Token::LBracket"; break; - case ']' : d = "Token::RBracket"; break; - case '{' : d = "Token::LBrace"; break; - case '}' : d = "Token::RBrace"; break; - default : break; - } - - if (d.length() == 0) { - if (isalpha(tok(0))) - d = "\"" + tok + "\", Token::AlphaIdent"; - else if (isdigit(tok(0))) { - if (tok.contains(".")) - d = "\"" + tok + "\", Token::FloatLiteral"; - else - d = "\"" + tok + "\", Token::IntLiteral"; - } - else - d = "\"" + tok + "\", Token::SymbolicIdent"; - } - - return d; -} - -// +-------------------------------------------------------------------+ - -Scanner::Scanner(Reader* r) - : reader(r), str(0), index(0), old_index(0), - length(0), line(0), old_line(0), lineStart(0) -{ } - -Scanner::Scanner(const Scanner& rhs) - : index(rhs.index), old_index(rhs.old_index), length(rhs.length), - reader(rhs.reader), - line(rhs.line), old_line(0), lineStart(rhs.lineStart) -{ - str = new char [strlen(rhs.str) + 1]; - strcpy(str, rhs.str); -} - -Scanner::Scanner(const Text& s) - : reader(0), index(0), old_index(0), length(s.length()), line(0), - old_line(0), lineStart(0) -{ - str = new char [s.length() + 1]; - strcpy(str, s.data()); -} - -Scanner::~Scanner() -{ - delete [] str; -} - -// +-------------------------------------------------------------------+ - -Scanner& -Scanner::operator = (const Scanner& rhs) -{ - delete [] str; - str = new char [strlen(rhs.str) + 1]; - strcpy(str, rhs.str); - - index = rhs.index; - old_index = rhs.old_index; - length = rhs.length; - line = rhs.line; - old_line = rhs.old_line; - lineStart = rhs.lineStart; - - return *this; -} - -// +-------------------------------------------------------------------+ - -void -Scanner::Load(const Text& s) -{ - delete [] str; - str = new char [s.length() + 1]; - strcpy(str, s.data()); - - index = 0; - old_index = 0; - best = Token(); - length = s.length(); - line = 0; - old_line = 0; - lineStart = 0; -} - -// +-------------------------------------------------------------------+ - -Token -Scanner::Get(Need need) -{ - int type = Token::EOT; - old_index = index; - old_line = line; - - eos = str + length; - p = str + index; - - if (p >= eos) { - if (need == Demand && reader) { - Load(reader->more()); - if (length > 0) - return Get(need); - } - return Token("", type, 0, line, 0); - } - - while (isspace(*p) && p < eos) { // skip initial white space - if (*p == '\n') { - line++; - lineStart = p - str; - } - p++; - } - - if (p >= eos) { - if (need == Demand && reader) { - Load(reader->more()); - if (length > 0) - return Get(need); - } - return Token("", type, 0, line, 0); - } - - Token result; - size_t start = p - str; - - if (*p == '"' || *p == '\'') { // special case for quoted tokens - - if (*p == '"') type = Token::StringLiteral; - else type = Token::CharLiteral; - - char match = *p; - while (++p < eos) { - if (*p == match) { // find matching quote - if (*(p-1) != '\\') { // if not escaped - p++; // token includes matching quote - break; - } - } - } - } - - // generic delimited comments - else if (*p == Token::comBeg(0) && - (!Token::comBeg(1) || *(p+1) == Token::comBeg(1))) { - type = Token::Comment; - while (++p < eos) { - if (*p == Token::comEnd(0) && - (!Token::comEnd(1) || *(p+1) == Token::comEnd(1))) { - p++; if (Token::comEnd(1)) p++; - break; - } - } - } - - // alternate form delimited comments - else if (*p == Token::altBeg(0) && - (!Token::altBeg(1) || *(p+1) == Token::altBeg(1))) { - type = Token::Comment; - while (++p < eos) { - if (*p == Token::altEnd(0) && - (!Token::altEnd(1) || *(p+1) == Token::altEnd(1))) { - p++; if (Token::altEnd(1)) p++; - break; - } - } - } - - else if (*p == '.') type = Token::Dot; - else if (*p == ',') type = Token::Comma; - else if (*p == ';') type = Token::Semicolon; - else if (*p == '(') type = Token::LParen; - else if (*p == ')') type = Token::RParen; - else if (*p == '[') type = Token::LBracket; - else if (*p == ']') type = Token::RBracket; - else if (*p == '{') type = Token::LBrace; - else if (*p == '}') type = Token::RBrace; - - // use lexical sub-parser for ints and floats - else if (isdigit(*p)) - type = GetNumeric(); - - else if (IsSymbolic(*p)) { - type = Token::SymbolicIdent; - while (IsSymbolic(*p)) p++; - } - - else { - type = Token::AlphaIdent; - while (IsAlpha(*p)) p++; - } - - size_t extent = (p - str) - start; - - if (extent < 1) extent = 1; // always get at least one character - - index = start + extent; // advance the cursor - int col = start - lineStart; - if (line == 0) col++; - - char* buf = new char [extent + 1]; - strncpy(buf, str + start, extent); - buf[extent] = '\0'; - - if (type == Token::Comment && Token::hidecom) { - delete [] buf; - if (Token::comEnd(0) == '\n') { - line++; - lineStart = p - str; - } - return Get(need); - } - - if (type == Token::AlphaIdent || // check for keyword - type == Token::SymbolicIdent) { - int val; - if (Token::findKey(Text(buf), val)) - result = Token(buf, Token::Keyword, val, line+1, col); - } - - if (result.mType != Token::Keyword) - result = Token(buf, type, 0, line+1, col); - - if (line+1 > (size_t) best.mLine || - (line+1 == (size_t) best.mLine && col > best.mColumn)) - best = result; - - delete [] buf; - return result; -} - -// +-------------------------------------------------------------------+ - -int -Scanner::GetNumeric() -{ - int type = Token::IntLiteral; // assume int - - if (*p == '0' && *(p+1) == 'x') { // check for hex: - p += 2; - while (isxdigit(*p)) p++; - return type; - } - - while (isdigit(*p) || *p == '_') p++; // whole number part - - if (*p == '.') { p++; // optional fract part - type = Token::FloatLiteral; // implies float - - while (isdigit(*p) || *p == '_') p++; // fractional part - } - - if (*p == 'E' || *p == 'e') { p++; // optional exponent - if (*p == '+' || *p == '-') p++; // which may be signed - while (isdigit(*p)) p++; - - type = Token::FloatLiteral; // implies float - } - - return type; -} - -// +-------------------------------------------------------------------+ - -bool -Scanner::IsAlpha(char c) -{ - return (isalpha(*p) || isdigit(*p) || (*p == '_'))?true:false; -} - -// +-------------------------------------------------------------------+ - -bool -Scanner::IsSymbolic(char c) -{ - const char* s = "+-*/\\<=>~!@#$%^&|:"; - return strchr(s, c)?true:false; -} diff --git a/StarsEx/Token.h b/StarsEx/Token.h deleted file mode 100644 index bd3723b..0000000 --- a/StarsEx/Token.h +++ /dev/null @@ -1,145 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Scanner class definition -*/ - -#ifndef Token_h -#define Token_h - -#include "Text.h" -#include "Dictionary.h" - -#pragma warning( disable : 4237) - -// +-------------------------------------------------------------------+ - -class Reader; -class Token; -class Scanner; - -// +-------------------------------------------------------------------+ - -class Token -{ - friend class Scanner; - -public: - // keywords must be alphanumeric identifiers or symbolic identifiers - enum Types { Undefined, Keyword, AlphaIdent, SymbolicIdent, Comment, - IntLiteral, FloatLiteral, StringLiteral, CharLiteral, - Dot, Comma, Colon, Semicolon, - LParen, RParen, LBracket, RBracket, LBrace, RBrace, - EOT, LastTokenType }; - - enum Alias { CompoundSeparator = Dot, - ItemSeparator = Comma, - StatementTerminator = Semicolon, - TypeIndicator = Colon, - Lambda = LastTokenType + 1 }; - - Token(); - Token(const Token& rhs); - Token(int t); - Token(const char* s, int t, int k=0, int l=0, int c=0); - Token(const Text& s, int t, int k=0, int l=0, int c=0); - ~Token(); - - Token& operator = (const Token& rhs); - - bool match(const Token& ref) const; - - Text symbol() const; - int type() const { return mType; } - int key() const { return mKey; } - int line() const { return mLine; } - int column() const { return mColumn; } - - Text typestr() const; - - static Text describe(const Text& tok); - static void addKey(const Text& k, int v); - static void addKeys(Dictionary& keys); - static bool findKey(const Text& k, int& v); - static void comments(const Text& begin, const Text& end); - static void altComments(const Text& begin, const Text& end); - static void hideComments(bool hide = true) { hidecom = hide; } - - static char comBeg(unsigned int i) { return combeg[i]; } - static char comEnd(unsigned int i) { return comend[i]; } - static char altBeg(unsigned int i) { return altbeg[i]; } - static char altEnd(unsigned int i) { return altend[i]; } - - static void close(); - -protected: - int mLength; - union { - char mSymbol[8]; - char* mFullSymbol; - }; - int mType; - int mKey; - int mLine; - int mColumn; - - static bool hidecom; - static char combeg[3]; - static char comend[3]; - static char altbeg[3]; - static char altend[3]; - - static Dictionary keymap; -}; - -// +-------------------------------------------------------------------+ - -class Scanner -{ -public: - Scanner(Reader* r = 0); - Scanner(const Text& s); - Scanner(const Scanner& rhs); - virtual ~Scanner(); - - Scanner& operator = (const Scanner& rhs); - - void Load(const Text& s); - - enum Need { Demand, Request }; - virtual Token Get(Need n = Demand); - - void PutBack() { index = old_index; line = old_line; } - int GetCursor() { return index; } - int GetLine() { return line; } - void Reset(int c, int l) { index = old_index = c; line = old_line = l; } - Token Best() const { return best; } - -protected: - virtual int GetNumeric(); - virtual bool IsSymbolic(char c); - virtual bool IsAlpha(char c); - - Reader* reader; - char* str; - - const char* p; - const char* eos; - - size_t index; - size_t old_index; - Token best; - size_t length; - size_t line; - size_t old_line; - size_t lineStart; -}; - -#endif // TOKEN_H -- cgit v1.1