From f5b8091ee91b8323b8e2b1044ba8be676f2bfaf4 Mon Sep 17 00:00:00 2001 From: Aki Date: Tue, 19 Mar 2024 22:56:52 +0100 Subject: Definition Parser now consumes from the new Reader Next steps are to move ParseUtils functionality to DefinitinoEx, sort things into the namespaces in this library, and then prepare it to handle overloading like nlohmann_json does. On the other side, it's time for starshatter::data. --- DefinitionEx/CMakeLists.txt | 24 +- DefinitionEx/Parser_ss.cpp | 303 ------------------ DefinitionEx/Parser_ss.h | 46 --- DefinitionEx/Term.cpp | 119 ------- DefinitionEx/Term.h | 171 ---------- DefinitionEx/Token.cpp | 544 -------------------------------- DefinitionEx/Token.h | 146 --------- DefinitionEx/include/Parser_ss.h | 43 +++ DefinitionEx/include/Term.h | 167 ++++++++++ DefinitionEx/include/Token.h | 143 +++++++++ DefinitionEx/src/Parser_ss.cpp | 316 +++++++++++++++++++ DefinitionEx/src/Term.cpp | 119 +++++++ DefinitionEx/src/Token.cpp | 549 +++++++++++++++++++++++++++++++++ DefinitionEx/test/parse_utils_like.cpp | 106 +++++++ 14 files changed, 1455 insertions(+), 1341 deletions(-) delete mode 100644 DefinitionEx/Parser_ss.cpp delete mode 100644 DefinitionEx/Parser_ss.h delete mode 100644 DefinitionEx/Term.cpp delete mode 100644 DefinitionEx/Term.h delete mode 100644 DefinitionEx/Token.cpp delete mode 100644 DefinitionEx/Token.h create mode 100644 DefinitionEx/include/Parser_ss.h create mode 100644 DefinitionEx/include/Term.h create mode 100644 DefinitionEx/include/Token.h create mode 100644 DefinitionEx/src/Parser_ss.cpp create mode 100644 DefinitionEx/src/Term.cpp create mode 100644 DefinitionEx/src/Token.cpp create mode 100644 DefinitionEx/test/parse_utils_like.cpp (limited to 'DefinitionEx') diff --git a/DefinitionEx/CMakeLists.txt b/DefinitionEx/CMakeLists.txt index fe4e819..f098670 100644 --- a/DefinitionEx/CMakeLists.txt +++ b/DefinitionEx/CMakeLists.txt @@ -1,15 +1,15 @@ project(DefinitionEx) + add_library( - DefinitionEx STATIC - Parser_ss.cpp - Term.cpp - Token.cpp - ) -target_include_directories( - DefinitionEx - PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} - ) -target_link_libraries( - DefinitionEx - PUBLIC FoundationEx + ${PROJECT_NAME} STATIC + src/Parser_ss.cpp + src/Term.cpp + src/Token.cpp ) +target_include_directories(${PROJECT_NAME} PUBLIC include) +target_link_libraries(${PROJECT_NAME} PUBLIC FoundationEx) + +add_executable(${PROJECT_NAME}_test test/parse_utils_like.cpp) +target_link_libraries(${PROJECT_NAME}_test PRIVATE ${PROJECT_NAME} GTest::gtest_main) +generate_emulator(${PROJECT_NAME}_test) +gtest_discover_tests(${PROJECT_NAME}_test DISCOVERY_TIMEOUT 60) diff --git a/DefinitionEx/Parser_ss.cpp b/DefinitionEx/Parser_ss.cpp deleted file mode 100644 index d02b7ee..0000000 --- a/DefinitionEx/Parser_ss.cpp +++ /dev/null @@ -1,303 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Implementation of the generic Parser class -*/ - -#include "Reader.h" -#include "Token.h" -#include "Parser_ss.h" -#include "Term.h" -#include "Utils.h" -#include -#include - -enum KEYS { KEY_TRUE, KEY_FALSE, KEY_DEF, KEY_MINUS }; - -static int dump_tokens = 0; - -// +-------------------------------------------------------------------+ - -Term* error(char* msg, const Token& token) -{ - static char buf[1024]; - snprintf(buf, 1024, " near '%s' in line %d", (const char*) token.symbol(), token.line()); - return error(msg, buf); -} - -// +-------------------------------------------------------------------+ - -Parser::Parser(Reader* r) -{ - reader = r; - lexer = new Scanner(reader); - - Token::addKey("true", KEY_TRUE); - Token::addKey("false", KEY_FALSE); - Token::addKey(":", KEY_DEF); - Token::addKey("-", KEY_MINUS); -} - -Parser::~Parser() -{ - delete lexer; - delete reader; - //Token::close(); -} - -Term* -Parser::ParseTerm() -{ - Term* t = ParseTermBase(); - if (t == 0) return t; - - Term* t2 = ParseTermRest(t); - - return t2; -} - -Term* -Parser::ParseTermRest(Term* base) -{ - Token t = lexer->Get(); - - switch (t.type()) { - default: - lexer->PutBack(); - return base; - - case Token::StringLiteral: { - // concatenate adjacent string literal tokens: - TermText* text = base->isText(); - if (text) { - TermText* base2 = new TermText(text->value() + t.symbol()(1, t.symbol().length()-2)); - delete base; - return ParseTermRest(base2); - } - else { - lexer->PutBack(); - } - } - break; - - case Token::Keyword: - switch (t.key()) { - case KEY_DEF: - if (base->isText()) - return new TermDef(base->isText(), ParseTerm()); - else - return error("(Parse) illegal lhs in def", t); - - default: - lexer->PutBack(); - return base; - } - break; - } - - return base; -} - -static int xtol(const char* p) -{ - int n = 0; - - while (*p) { - char digit = *p++; - n *= 16; - - if (digit >= '0' && digit <= '9') - n += digit - '0'; - - else if (digit >= 'a' && digit <= 'f') - n += digit - 'a' + 10; - - else if (digit >= 'A' && digit <= 'F') - n += digit - 'A' + 10; - } - - return n; -} - -Term* -Parser::ParseTermBase() -{ - Token t = lexer->Get(); - int n = 0; - double d = 0.0; - - switch (t.type()) { - case Token::IntLiteral: { - if (dump_tokens) - Print("%s", t.symbol().data()); - - char nstr[256], *p = nstr; - for (int i = 0; i < (int) t.symbol().length(); i++) - if (t.symbol()[i] != '_') - *p++ = t.symbol()[i]; - *p++ = '\0'; - - // handle hex notation: - if (nstr[1] == 'x') - n = xtol(nstr+2); - - else - n = atol(nstr); - - return new TermNumber(n); - } - - case Token::FloatLiteral: { - if (dump_tokens) - Print("%s", t.symbol().data()); - - char nstr[256], *p = nstr; - for (int i = 0; i < (int) t.symbol().length(); i++) - if (t.symbol()[i] != '_') - *p++ = t.symbol()[i]; - *p++ = '\0'; - - d = atof(nstr); - return new TermNumber(d); - } - - case Token::StringLiteral: - if (dump_tokens) - Print("%s", t.symbol().data()); - - return new TermText(t.symbol()(1, t.symbol().length()-2)); - - case Token::AlphaIdent: - if (dump_tokens) - Print("%s", t.symbol().data()); - - return new TermText(t.symbol()); - - case Token::Keyword: - if (dump_tokens) - Print("%s", t.symbol().data()); - - switch (t.key()) { - case KEY_FALSE: return new TermBool(0); - case KEY_TRUE: return new TermBool(1); - - case KEY_MINUS: { - Token next = lexer->Get(); - if (next.type() == Token::IntLiteral) { - if (dump_tokens) - Print("%s", next.symbol().data()); - - char nstr[256], *p = nstr; - for (int i = 0; i < (int) next.symbol().length(); i++) - if (next.symbol()[i] != '_') - *p++ = next.symbol()[i]; - *p++ = '\0'; - - n = -1 * atol(nstr); - return new TermNumber(n); - } - else if (next.type() == Token::FloatLiteral) { - if (dump_tokens) - Print("%s", next.symbol().data()); - - char nstr[256], *p = nstr; - for (int i = 0; i < (int) next.symbol().length(); i++) - if (next.symbol()[i] != '_') - *p++ = next.symbol()[i]; - *p++ = '\0'; - - d = -1.0 * atof(nstr); - return new TermNumber(d); - } - else { - lexer->PutBack(); - return error("(Parse) illegal token '-': number expected", next); - } - } - break; - - default: - lexer->PutBack(); - return 0; - } - - case Token::LParen: return ParseArray(); - - case Token::LBrace: return ParseStruct(); - - case Token::CharLiteral: - return error("(Parse) illegal token ", t); - - default: - lexer->PutBack(); - return 0; - } -} - -TermArray* -Parser::ParseArray() -{ - TermList* elems = ParseTermList(0); - Token end = lexer->Get(); - - if (end.type() != Token::RParen) - return (TermArray*) error("(Parse) ')' missing in array-decl", end); - - return new TermArray(elems); -} - -TermStruct* -Parser::ParseStruct() -{ - TermList* elems = ParseTermList(1); - Token end = lexer->Get(); - - if (end.type() != Token::RBrace) - return (TermStruct*) error("(Parse) '}' missing in struct", end); - - return new TermStruct(elems); -} - -TermList* -Parser::ParseTermList(int for_struct) -{ - TermList* tlist = new TermList; - - Term* term = ParseTerm(); - while (term) { - if (for_struct && !term->isDef()) { - return (TermList*) error("(Parse) non-definition term in struct"); - } - else if (!for_struct && term->isDef()) { - return (TermList*) error("(Parse) illegal definition in array"); - } - - tlist->append(term); - Token t = lexer->Get(); - - /*** OLD WAY: COMMA SEPARATORS REQUIRED *** - if (t.type() != Token::Comma) { - lexer->PutBack(); - term = 0; - } - else - term = ParseTerm(); - /*******************************************/ - - // NEW WAY: COMMA SEPARATORS OPTIONAL: - if (t.type() != Token::Comma) { - lexer->PutBack(); - } - - term = ParseTerm(); - } - - return tlist; -} diff --git a/DefinitionEx/Parser_ss.h b/DefinitionEx/Parser_ss.h deleted file mode 100644 index 5019f61..0000000 --- a/DefinitionEx/Parser_ss.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Declaration of the generic Parser class -*/ - -#ifndef Parser_h -#define Parser_h - -#include "Text.h" -#include "Term.h" - -// +-------------------------------------------------------------------+ - -class Reader; -class Scanner; - -// +-------------------------------------------------------------------+ - -class Parser -{ -public: - Parser(Reader* r = 0); - ~Parser(); - - Term* ParseTerm(); - Term* ParseTermBase(); - Term* ParseTermRest(Term* base); - TermList* ParseTermList(int for_struct); - TermArray* ParseArray(); - TermStruct* ParseStruct(); - -private: - Reader* reader; - Scanner* lexer; -}; - - -#endif // Parser_h diff --git a/DefinitionEx/Term.cpp b/DefinitionEx/Term.cpp deleted file mode 100644 index c27eb4e..0000000 --- a/DefinitionEx/Term.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Implementation of the Term class -*/ - - -#include "Term.h" -#include "Utils.h" - -// +-------------------------------------------------------------------+ - -Term* -error(char* s1, char* s2) -{ - Print("ERROR: "); - if (s1) Print(s1); - if (s2) Print(s2); - Print("\n\n"); - return 0; -} - -// +-------------------------------------------------------------------+ - -void TermBool::print(int level) { if (level > 0) Print(val? "true" : "false"); else Print("..."); } -void TermNumber::print(int level){ if (level > 0) Print("%g", val); else Print("..."); } -void TermText::print(int level) { if (level > 0) Print("\"%s\"", val.data()); else Print("..."); } - -// +-------------------------------------------------------------------+ - -TermArray::TermArray(TermList* elist) -{ - elems = elist; -} - -TermArray::~TermArray() -{ - if (elems) elems->destroy(); - delete elems; -} - -void -TermArray::print(int level) -{ - if (level > 1) { - Print("("); - - if (elems) { - for (int i = 0; i < elems->size(); i++) { - elems->at(i)->print(level-1); - if (i < elems->size() -1) - Print(", "); - } - } - - Print(") "); - } - else Print("(...) "); -} - -// +-------------------------------------------------------------------+ - -TermStruct::TermStruct(TermList* elist) -{ - elems = elist; -} - -TermStruct::~TermStruct() -{ - if (elems) elems->destroy(); - delete elems; -} - -void -TermStruct::print(int level) -{ - if (level > 1) { - Print("{"); - - if (elems) { - for (int i = 0; i < elems->size(); i++) { - elems->at(i)->print(level-1); - if (i < elems->size() -1) - Print(", "); - } - } - - Print("} "); - } - else Print("{...} "); -} - -// +-------------------------------------------------------------------+ - -TermDef::~TermDef() -{ - delete mname; - delete mval; -} - -void -TermDef::print(int level) -{ - if (level >= 0) { - mname->print(level); - Print(": "); - mval->print(level-1); - } - else Print("..."); -} - -// +-------------------------------------------------------------------+ diff --git a/DefinitionEx/Term.h b/DefinitionEx/Term.h deleted file mode 100644 index f7e2b86..0000000 --- a/DefinitionEx/Term.h +++ /dev/null @@ -1,171 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Declaration of the Abstract Syntax Tree classes -*/ - -#ifndef Term_h -#define Term_h - -#include "Text.h" -#include "List.h" - -// +-------------------------------------------------------------------+ - -class Term; -class TermBool; -class TermNumber; -class TermText; -class TermArray; -class TermDef; -class TermStruct; - -// +-------------------------------------------------------------------+ - -class Term -{ -public: - static const char* TYPENAME() { return "Term"; } - - Term() { } - virtual ~Term() { } - - virtual int operator==(const Term& rhs) const { return 0; } - - virtual void print(int level=10) { } - - // conversion tests - virtual Term* touch() { return this; } - virtual TermBool* isBool() { return 0; } - virtual TermNumber* isNumber() { return 0; } - virtual TermText* isText() { return 0; } - virtual TermArray* isArray() { return 0; } - virtual TermDef* isDef() { return 0; } - virtual TermStruct* isStruct() { return 0; } -}; - -Term* error(char*, char* = 0); - -// +-------------------------------------------------------------------+ - -typedef List TermList; -typedef ListIter TermListIter; - -// +-------------------------------------------------------------------+ - -class TermBool : public Term -{ -public: - static const char* TYPENAME() { return "TermBool"; } - - TermBool(bool v) : val(v) { } - - virtual void print(int level=10); - virtual TermBool* isBool() { return this; } - bool value() const { return val; } - -private: - bool val; -}; - -// +-------------------------------------------------------------------+ - -class TermNumber : public Term -{ -public: - static const char* TYPENAME() { return "TermNumber"; } - - TermNumber(double v) : val(v) { } - - virtual void print(int level=10); - virtual TermNumber* isNumber() { return this; } - double value() const { return val; } - -private: - double val; -}; - -// +-------------------------------------------------------------------+ - -class TermText : public Term -{ -public: - static const char* TYPENAME() { return "TermText"; } - - TermText(const Text& v) : val(v) { } - - virtual void print(int level=10); - virtual TermText* isText() { return this; } - Text value() const { return val; } - -private: - Text val; -}; - -// +-------------------------------------------------------------------+ - -class TermArray : public Term -{ -public: - static const char* TYPENAME() { return "TermArray"; } - - TermArray(TermList* elist); - virtual ~TermArray(); - - virtual void print(int level=10); - virtual TermArray* isArray() { return this; } - TermList* elements() { return elems; } - -private: - TermList* elems; -}; - -// +-------------------------------------------------------------------+ - -class TermStruct : public Term -{ -public: - static const char* TYPENAME() { return "TermStruct"; } - - TermStruct(TermList* elist); - virtual ~TermStruct(); - - virtual void print(int level=10); - - virtual TermStruct* isStruct() { return this; } - TermList* elements() { return elems; } - -private: - TermList* elems; -}; - -// +-------------------------------------------------------------------+ - -class TermDef : public Term -{ -public: - static const char* TYPENAME() { return "TermDef"; } - - TermDef(TermText* n, Term* v) : mname(n), mval(v) { } - virtual ~TermDef(); - - virtual void print(int level=10); - virtual TermDef* isDef() { return this; } - - virtual TermText* name() { return mname; } - virtual Term* term() { return mval; } - -private: - TermText* mname; - Term* mval; -}; - - -#endif // Term_h diff --git a/DefinitionEx/Token.cpp b/DefinitionEx/Token.cpp deleted file mode 100644 index 2d060b9..0000000 --- a/DefinitionEx/Token.cpp +++ /dev/null @@ -1,544 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Scanner class implementation -*/ - -#include "Token.h" -#include "Reader.h" -#include "Text.h" - -#include - -// +-------------------------------------------------------------------+ - -bool Token::hidecom = true; -char Token::combeg[3] = "//"; -char Token::comend[3] = "\n"; -char Token::altbeg[3] = "/*"; -char Token::altend[3] = "*/"; -Dictionary Token::keymap; - -// +-------------------------------------------------------------------+ - -Token::Token() - : mType(Undefined), mKey(0), mLine(0), mColumn(0) -{ - mLength = 0; - mSymbol[0] = '\0'; -} - -Token::Token(const Token& rhs) - : mType(rhs.mType), mKey(rhs.mKey), mLine(rhs.mLine), mColumn(rhs.mColumn) -{ - mLength = rhs.mLength; - if (mLength < 8) { - strcpy(mSymbol, rhs.mSymbol); - } - else { - mFullSymbol = new char[mLength + 1]; - strcpy(mFullSymbol, rhs.mFullSymbol); - } -} - -Token::Token(int t) - : mType(t), mKey(0), mLine(0), mColumn(0) -{ - mLength = 0; - mSymbol[0] = '\0'; -} - -Token::Token(const char* s, int t, int k, int l, int c) - : mType(t), mKey(k), mLine(l), mColumn(c) -{ - mLength = strlen(s); - if (mLength < 8) { - strcpy(mSymbol, s); - } - else { - mFullSymbol = new char[mLength + 1]; - strcpy(mFullSymbol, s); - } -} - -Token::Token(const Text& s, int t, int k, int l, int c) - : mType(t), mKey(k), mLine(l), mColumn(c) -{ - mLength = s.length(); - if (mLength < 8) { - strcpy(mSymbol, s.data()); - } - else { - mFullSymbol = new char[mLength + 1]; - strcpy(mFullSymbol, s.data()); - } -} - -Token::~Token() -{ - if (mLength >= 8) - delete [] mFullSymbol; -} - -// +-------------------------------------------------------------------+ - -void -Token::close() -{ - keymap.clear(); -} - -// +-------------------------------------------------------------------+ - -Token& -Token::operator = (const Token& rhs) -{ - if (mLength >= 8) - delete [] mFullSymbol; - - mLength = rhs.mLength; - if (mLength < 8) { - strcpy(mSymbol, rhs.mSymbol); - } - else { - mFullSymbol = new char[mLength + 1]; - strcpy(mFullSymbol, rhs.mFullSymbol); - } - - mType = rhs.mType; - mKey = rhs.mKey; - mLine = rhs.mLine; - mColumn = rhs.mColumn; - - return *this; -} - -// +-------------------------------------------------------------------+ - -bool -Token::match(const Token& ref) const -{ - if (mType == ref.mType) { // if types match - if (ref.mLength == 0) // if no symbol to match - return true; // match! - - else if (mLength == ref.mLength) { // else if symbols match - if (mLength < 8) { - if (!strcmp(mSymbol, ref.mSymbol)) - return true; // match! - } - else { - if (!strcmp(mFullSymbol, ref.mFullSymbol)) - return true; // match! - } - } - } - - return false; -} - -// +-------------------------------------------------------------------+ - -Text -Token::symbol() const -{ - if (mLength < 8) - return Text(mSymbol); - else - return Text(mFullSymbol); -} - -// +-------------------------------------------------------------------+ - -void -Token::addKey(const Text& k, int v) -{ - keymap.insert(k, v); -} - -// +-------------------------------------------------------------------+ - -void -Token::addKeys(Dictionary& keys) -{ - DictionaryIter iter = keys; - while (++iter) - keymap.insert(iter.key(), iter.value()); -} - -// +-------------------------------------------------------------------+ - -bool -Token::findKey(const Text& k, int& v) -{ - if (keymap.contains(k)) { - v = keymap.find(k, 0); - return true; - } - else - return false; -} - -// +-------------------------------------------------------------------+ - -void -Token::comments(const Text& begin, const Text& end) -{ - combeg[0] = begin(0); - if (begin.length() > 1) combeg[1] = begin(1); - else combeg[1] = '\0'; - - comend[0] = end(0); - if (end.length() > 1) comend[1] = end(1); - else comend[1] = '\0'; -} - -// +-------------------------------------------------------------------+ - -void -Token::altComments(const Text& begin, const Text& end) -{ - altbeg[0] = begin(0); - if (begin.length() > 1) altbeg[1] = begin(1); - else altbeg[1] = '\0'; - - altend[0] = end(0); - if (end.length() > 1) altend[1] = end(1); - else altend[1] = '\0'; -} - -// +-------------------------------------------------------------------+ - -Text -Token::typestr() const -{ - Text t = "Unknown"; - switch (type()) { - case Undefined: t = "Undefined"; break; - case Keyword: t = "Keyword"; break; - case AlphaIdent: t = "AlphaIdent"; break; - case SymbolicIdent: t = "SymbolicIdent"; break; - case Comment: t = "Comment"; break; - case IntLiteral: t = "IntLiteral"; break; - case FloatLiteral: t = "FloatLiteral"; break; - case StringLiteral: t = "StringLiteral"; break; - case CharLiteral: t = "CharLiteral"; break; - case Dot: t = "Dot"; break; - case Comma: t = "Comma"; break; - case Colon: t = "Colon"; break; - case Semicolon: t = "Semicolon"; break; - case LParen: t = "LParen"; break; - case RParen: t = "RParen"; break; - case LBracket: t = "LBracket"; break; - case RBracket: t = "RBracket"; break; - case LBrace: t = "LBrace"; break; - case RBrace: t = "RBrace"; break; - case EOT: t = "EOT"; break; - case LastTokenType: t = "LastTokenType"; break; - } - - return t; -} - -// +-------------------------------------------------------------------+ - -Text -Token::describe(const Text& tok) -{ - Text d; - - switch (tok(0)) { - case '.' : d = "Token::Dot"; break; - case ',' : d = "Token::Comma"; break; - case ';' : d = "Token::Semicolon"; break; - case '(' : d = "Token::LParen"; break; - case ')' : d = "Token::RParen"; break; - case '[' : d = "Token::LBracket"; break; - case ']' : d = "Token::RBracket"; break; - case '{' : d = "Token::LBrace"; break; - case '}' : d = "Token::RBrace"; break; - default : break; - } - - if (d.length() == 0) { - if (isalpha(tok(0))) - d = "\"" + tok + "\", Token::AlphaIdent"; - else if (isdigit(tok(0))) { - if (tok.contains(".")) - d = "\"" + tok + "\", Token::FloatLiteral"; - else - d = "\"" + tok + "\", Token::IntLiteral"; - } - else - d = "\"" + tok + "\", Token::SymbolicIdent"; - } - - return d; -} - -// +-------------------------------------------------------------------+ - -Scanner::Scanner(Reader* r) - : reader(r), str(0), index(0), old_index(0), - length(0), line(0), old_line(0), lineStart(0) -{ } - -Scanner::Scanner(const Scanner& rhs) - : index(rhs.index), old_index(rhs.old_index), length(rhs.length), - reader(rhs.reader), - line(rhs.line), old_line(0), lineStart(rhs.lineStart) -{ - str = new char [strlen(rhs.str) + 1]; - strcpy(str, rhs.str); -} - -Scanner::Scanner(const Text& s) - : reader(0), index(0), old_index(0), length(s.length()), line(0), - old_line(0), lineStart(0) -{ - str = new char [s.length() + 1]; - strcpy(str, s.data()); -} - -Scanner::~Scanner() -{ - delete [] str; -} - -// +-------------------------------------------------------------------+ - -Scanner& -Scanner::operator = (const Scanner& rhs) -{ - delete [] str; - str = new char [strlen(rhs.str) + 1]; - strcpy(str, rhs.str); - - index = rhs.index; - old_index = rhs.old_index; - length = rhs.length; - line = rhs.line; - old_line = rhs.old_line; - lineStart = rhs.lineStart; - - return *this; -} - -// +-------------------------------------------------------------------+ - -void -Scanner::Load(const Text& s) -{ - delete [] str; - str = new char [s.length() + 1]; - strcpy(str, s.data()); - - index = 0; - old_index = 0; - best = Token(); - length = s.length(); - line = 0; - old_line = 0; - lineStart = 0; -} - -// +-------------------------------------------------------------------+ - -Token -Scanner::Get(Need need) -{ - int type = Token::EOT; - old_index = index; - old_line = line; - - eos = str + length; - p = str + index; - - if (p >= eos) { - if (need == Demand && reader) { - Load(reader->more()); - if (length > 0) - return Get(need); - } - return Token("", type, 0, line, 0); - } - - while (isspace(*p) && p < eos) { // skip initial white space - if (*p == '\n') { - line++; - lineStart = p - str; - } - p++; - } - - if (p >= eos) { - if (need == Demand && reader) { - Load(reader->more()); - if (length > 0) - return Get(need); - } - return Token("", type, 0, line, 0); - } - - Token result; - size_t start = p - str; - - if (*p == '"' || *p == '\'') { // special case for quoted tokens - - if (*p == '"') type = Token::StringLiteral; - else type = Token::CharLiteral; - - char match = *p; - while (++p < eos) { - if (*p == match) { // find matching quote - if (*(p-1) != '\\') { // if not escaped - p++; // token includes matching quote - break; - } - } - } - } - - // generic delimited comments - else if (*p == Token::comBeg(0) && - (!Token::comBeg(1) || *(p+1) == Token::comBeg(1))) { - type = Token::Comment; - while (++p < eos) { - if (*p == Token::comEnd(0) && - (!Token::comEnd(1) || *(p+1) == Token::comEnd(1))) { - p++; if (Token::comEnd(1)) p++; - break; - } - } - } - - // alternate form delimited comments - else if (*p == Token::altBeg(0) && - (!Token::altBeg(1) || *(p+1) == Token::altBeg(1))) { - type = Token::Comment; - while (++p < eos) { - if (*p == Token::altEnd(0) && - (!Token::altEnd(1) || *(p+1) == Token::altEnd(1))) { - p++; if (Token::altEnd(1)) p++; - break; - } - } - } - - else if (*p == '.') type = Token::Dot; - else if (*p == ',') type = Token::Comma; - else if (*p == ';') type = Token::Semicolon; - else if (*p == '(') type = Token::LParen; - else if (*p == ')') type = Token::RParen; - else if (*p == '[') type = Token::LBracket; - else if (*p == ']') type = Token::RBracket; - else if (*p == '{') type = Token::LBrace; - else if (*p == '}') type = Token::RBrace; - - // use lexical sub-parser for ints and floats - else if (isdigit(*p)) - type = GetNumeric(); - - else if (IsSymbolic(*p)) { - type = Token::SymbolicIdent; - while (IsSymbolic(*p)) p++; - } - - else { - type = Token::AlphaIdent; - while (IsAlpha(*p)) p++; - } - - size_t extent = (p - str) - start; - - if (extent < 1) extent = 1; // always get at least one character - - index = start + extent; // advance the cursor - int col = start - lineStart; - if (line == 0) col++; - - char* buf = new char [extent + 1]; - strncpy(buf, str + start, extent); - buf[extent] = '\0'; - - if (type == Token::Comment && Token::hidecom) { - delete [] buf; - if (Token::comEnd(0) == '\n') { - line++; - lineStart = p - str; - } - return Get(need); - } - - if (type == Token::AlphaIdent || // check for keyword - type == Token::SymbolicIdent) { - int val; - if (Token::findKey(Text(buf), val)) - result = Token(buf, Token::Keyword, val, line+1, col); - } - - if (result.mType != Token::Keyword) - result = Token(buf, type, 0, line+1, col); - - if (line+1 > (size_t) best.mLine || - (line+1 == (size_t) best.mLine && col > best.mColumn)) - best = result; - - delete [] buf; - return result; -} - -// +-------------------------------------------------------------------+ - -int -Scanner::GetNumeric() -{ - int type = Token::IntLiteral; // assume int - - if (*p == '0' && *(p+1) == 'x') { // check for hex: - p += 2; - while (isxdigit(*p)) p++; - return type; - } - - while (isdigit(*p) || *p == '_') p++; // whole number part - - if (*p == '.') { p++; // optional fract part - type = Token::FloatLiteral; // implies float - - while (isdigit(*p) || *p == '_') p++; // fractional part - } - - if (*p == 'E' || *p == 'e') { p++; // optional exponent - if (*p == '+' || *p == '-') p++; // which may be signed - while (isdigit(*p)) p++; - - type = Token::FloatLiteral; // implies float - } - - return type; -} - -// +-------------------------------------------------------------------+ - -bool -Scanner::IsAlpha(char c) -{ - return (isalpha(*p) || isdigit(*p) || (*p == '_'))?true:false; -} - -// +-------------------------------------------------------------------+ - -bool -Scanner::IsSymbolic(char c) -{ - const char* s = "+-*/\\<=>~!@#$%^&|:"; - return strchr(s, c)?true:false; -} diff --git a/DefinitionEx/Token.h b/DefinitionEx/Token.h deleted file mode 100644 index 59a3414..0000000 --- a/DefinitionEx/Token.h +++ /dev/null @@ -1,146 +0,0 @@ -/* Starshatter: The Open Source Project - Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors - Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors - Copyright (c) 1997-2006, Destroyer Studios LLC. - - AUTHOR: John DiCamillo - - - OVERVIEW - ======== - Scanner class definition -*/ - -#ifndef Token_h -#define Token_h - -#include "Text.h" -#include "Dictionary.h" - -#pragma warning( disable : 4237) - -// +-------------------------------------------------------------------+ - -class Reader; -class Token; -class Scanner; - -// +-------------------------------------------------------------------+ - -class Token -{ - friend class Scanner; - -public: - // keywords must be alphanumeric identifiers or symbolic identifiers - enum Types { Undefined, Keyword, AlphaIdent, SymbolicIdent, Comment, - IntLiteral, FloatLiteral, StringLiteral, CharLiteral, - Dot, Comma, Colon, Semicolon, - LParen, RParen, LBracket, RBracket, LBrace, RBrace, - EOT, LastTokenType }; - - enum Alias { CompoundSeparator = Dot, - ItemSeparator = Comma, - StatementTerminator = Semicolon, - TypeIndicator = Colon, - Lambda = LastTokenType + 1 }; - - Token(); - Token(const Token& rhs); - Token(int t); - Token(const char* s, int t, int k=0, int l=0, int c=0); - Token(const Text& s, int t, int k=0, int l=0, int c=0); - ~Token(); - - Token& operator = (const Token& rhs); - - bool match(const Token& ref) const; - - Text symbol() const; - int type() const { return mType; } - int key() const { return mKey; } - int line() const { return mLine; } - int column() const { return mColumn; } - - Text typestr() const; - - static Text describe(const Text& tok); - static void addKey(const Text& k, int v); - static void addKeys(Dictionary& keys); - static bool findKey(const Text& k, int& v); - static void comments(const Text& begin, const Text& end); - static void altComments(const Text& begin, const Text& end); - static void hideComments(bool hide = true) { hidecom = hide; } - - static char comBeg(unsigned int i) { return combeg[i]; } - static char comEnd(unsigned int i) { return comend[i]; } - static char altBeg(unsigned int i) { return altbeg[i]; } - static char altEnd(unsigned int i) { return altend[i]; } - - static void close(); - -protected: - int mLength; - union { - char mSymbol[8]; - char* mFullSymbol; - }; - int mType; - int mKey; - int mLine; - int mColumn; - - static bool hidecom; - static char combeg[3]; - static char comend[3]; - static char altbeg[3]; - static char altend[3]; - - static Dictionary keymap; -}; - -// +-------------------------------------------------------------------+ - -class Scanner -{ -public: - Scanner(Reader* r = 0); - Scanner(const Text& s); - Scanner(const Scanner& rhs); - virtual ~Scanner(); - - Scanner& operator = (const Scanner& rhs); - - void Load(const Text& s); - - enum Need { Demand, Request }; - virtual Token Get(Need n = Demand); - - void PutBack() { index = old_index; line = old_line; } - int GetCursor() { return index; } - int GetLine() { return line; } - void Reset(int c, int l) { index = old_index = c; line = old_line = l; } - Token Best() const { return best; } - -protected: - virtual int GetNumeric(); - virtual bool IsSymbolic(char c); - virtual bool IsAlpha(char c); - - Reader* reader; - char* str; - - const char* p; - const char* eos; - - size_t index; - size_t old_index; - Token best; - size_t length; - size_t line; - size_t old_line; - size_t lineStart; -}; - - -#endif // Token_h diff --git a/DefinitionEx/include/Parser_ss.h b/DefinitionEx/include/Parser_ss.h new file mode 100644 index 0000000..50659b9 --- /dev/null +++ b/DefinitionEx/include/Parser_ss.h @@ -0,0 +1,43 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Declaration of the generic Parser class +*/ + +#pragma once + +#include +#include + +#include "Term.h" + +// +-------------------------------------------------------------------+ + +class Scanner; + +// +-------------------------------------------------------------------+ + +class Parser +{ +public: + Parser(); + Parser(starshatter::foundation::Reader r); + ~Parser(); + + Term* ParseTerm(); + Term* ParseTermBase(); + Term* ParseTermRest(Term* base); + TermList* ParseTermList(int for_struct); + TermArray* ParseArray(); + TermStruct* ParseStruct(); + +private: + Scanner* lexer; +}; diff --git a/DefinitionEx/include/Term.h b/DefinitionEx/include/Term.h new file mode 100644 index 0000000..1bc30b6 --- /dev/null +++ b/DefinitionEx/include/Term.h @@ -0,0 +1,167 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Declaration of the Abstract Syntax Tree classes +*/ + +#pragma once + +#include +#include + +// +-------------------------------------------------------------------+ + +class Term; +class TermBool; +class TermNumber; +class TermText; +class TermArray; +class TermDef; +class TermStruct; + +// +-------------------------------------------------------------------+ + +class Term +{ +public: + static const char* TYPENAME() { return "Term"; } + + Term() { } + virtual ~Term() { } + + virtual int operator==(const Term& rhs) const { return 0; } + + virtual void print(int level=10) { } + + // conversion tests + virtual Term* touch() { return this; } + virtual TermBool* isBool() { return 0; } + virtual TermNumber* isNumber() { return 0; } + virtual TermText* isText() { return 0; } + virtual TermArray* isArray() { return 0; } + virtual TermDef* isDef() { return 0; } + virtual TermStruct* isStruct() { return 0; } +}; + +Term* error(const char*, const char* = 0); + +// +-------------------------------------------------------------------+ + +typedef List TermList; +typedef ListIter TermListIter; + +// +-------------------------------------------------------------------+ + +class TermBool : public Term +{ +public: + static const char* TYPENAME() { return "TermBool"; } + + TermBool(bool v) : val(v) { } + + virtual void print(int level=10); + virtual TermBool* isBool() { return this; } + bool value() const { return val; } + +private: + bool val; +}; + +// +-------------------------------------------------------------------+ + +class TermNumber : public Term +{ +public: + static const char* TYPENAME() { return "TermNumber"; } + + TermNumber(double v) : val(v) { } + + virtual void print(int level=10); + virtual TermNumber* isNumber() { return this; } + double value() const { return val; } + +private: + double val; +}; + +// +-------------------------------------------------------------------+ + +class TermText : public Term +{ +public: + static const char* TYPENAME() { return "TermText"; } + + TermText(const Text& v) : val(v) { } + + virtual void print(int level=10); + virtual TermText* isText() { return this; } + Text value() const { return val; } + +private: + Text val; +}; + +// +-------------------------------------------------------------------+ + +class TermArray : public Term +{ +public: + static const char* TYPENAME() { return "TermArray"; } + + TermArray(TermList* elist); + virtual ~TermArray(); + + virtual void print(int level=10); + virtual TermArray* isArray() { return this; } + TermList* elements() { return elems; } + +private: + TermList* elems; +}; + +// +-------------------------------------------------------------------+ + +class TermStruct : public Term +{ +public: + static const char* TYPENAME() { return "TermStruct"; } + + TermStruct(TermList* elist); + virtual ~TermStruct(); + + virtual void print(int level=10); + + virtual TermStruct* isStruct() { return this; } + TermList* elements() { return elems; } + +private: + TermList* elems; +}; + +// +-------------------------------------------------------------------+ + +class TermDef : public Term +{ +public: + static const char* TYPENAME() { return "TermDef"; } + + TermDef(TermText* n, Term* v) : mname(n), mval(v) { } + virtual ~TermDef(); + + virtual void print(int level=10); + virtual TermDef* isDef() { return this; } + + virtual TermText* name() { return mname; } + virtual Term* term() { return mval; } + +private: + TermText* mname; + Term* mval; +}; diff --git a/DefinitionEx/include/Token.h b/DefinitionEx/include/Token.h new file mode 100644 index 0000000..3dd8eb9 --- /dev/null +++ b/DefinitionEx/include/Token.h @@ -0,0 +1,143 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Scanner class definition +*/ + +#pragma once + +#include +#include +#include + +#pragma warning( disable : 4237) + +// +-------------------------------------------------------------------+ + +class Token; +class Scanner; + +// +-------------------------------------------------------------------+ + +class Token +{ + friend class Scanner; + +public: + // keywords must be alphanumeric identifiers or symbolic identifiers + enum Types { Undefined, Keyword, AlphaIdent, SymbolicIdent, Comment, + IntLiteral, FloatLiteral, StringLiteral, CharLiteral, + Dot, Comma, Colon, Semicolon, + LParen, RParen, LBracket, RBracket, LBrace, RBrace, + EOT, LastTokenType }; + + enum Alias { CompoundSeparator = Dot, + ItemSeparator = Comma, + StatementTerminator = Semicolon, + TypeIndicator = Colon, + Lambda = LastTokenType + 1 }; + + Token(); + Token(const Token& rhs); + Token(int t); + Token(const char* s, int t, int k=0, int l=0, int c=0); + Token(const Text& s, int t, int k=0, int l=0, int c=0); + ~Token(); + + Token& operator = (const Token& rhs); + + bool match(const Token& ref) const; + + Text symbol() const; + int type() const { return mType; } + int key() const { return mKey; } + int line() const { return mLine; } + int column() const { return mColumn; } + + Text typestr() const; + + static Text describe(const Text& tok); + static void addKey(const Text& k, int v); + static void addKeys(Dictionary& keys); + static bool findKey(const Text& k, int& v); + static void comments(const Text& begin, const Text& end); + static void altComments(const Text& begin, const Text& end); + static void hideComments(bool hide = true) { hidecom = hide; } + + static char comBeg(unsigned int i) { return combeg[i]; } + static char comEnd(unsigned int i) { return comend[i]; } + static char altBeg(unsigned int i) { return altbeg[i]; } + static char altEnd(unsigned int i) { return altend[i]; } + + static void close(); + +protected: + int mLength; + union { + char mSymbol[8]; + char* mFullSymbol; + }; + int mType; + int mKey; + int mLine; + int mColumn; + + static bool hidecom; + static char combeg[3]; + static char comend[3]; + static char altbeg[3]; + static char altend[3]; + + static Dictionary keymap; +}; + +// +-------------------------------------------------------------------+ + +class Scanner +{ +public: + Scanner(); + Scanner(starshatter::foundation::Reader r); + Scanner(const Text& s); + Scanner(const Scanner& rhs); + virtual ~Scanner(); + + Scanner& operator = (const Scanner& rhs); + + void Load(const Text& s); + + enum Need { Demand, Request }; + virtual Token Get(Need n = Demand); + + void PutBack() { index = old_index; line = old_line; } + int GetCursor() { return index; } + int GetLine() { return line; } + void Reset(int c, int l) { index = old_index = c; line = old_line = l; } + Token Best() const { return best; } + +protected: + virtual int GetNumeric(); + virtual bool IsSymbolic(char c); + virtual bool IsAlpha(char c); + + starshatter::foundation::Reader reader; + char* str; + + const char* p; + const char* eos; + + size_t index; + size_t old_index; + Token best; + size_t length; + size_t line; + size_t old_line; + size_t lineStart; +}; diff --git a/DefinitionEx/src/Parser_ss.cpp b/DefinitionEx/src/Parser_ss.cpp new file mode 100644 index 0000000..1b80d48 --- /dev/null +++ b/DefinitionEx/src/Parser_ss.cpp @@ -0,0 +1,316 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Implementation of the generic Parser class +*/ + +#include + +#include +#include + +#include + +#include +#include + +#include +#include + +enum KEYS { KEY_TRUE, KEY_FALSE, KEY_DEF, KEY_MINUS }; + +static int dump_tokens = 0; + +// +-------------------------------------------------------------------+ + +Term* error(const char* msg, const Token& token) +{ + static char buf[1024]; + snprintf(buf, 1024, " near '%s' in line %d", (const char*) token.symbol(), token.line()); + return error(msg, buf); +} + +// +-------------------------------------------------------------------+ + +Parser::Parser() : + lexer {nullptr} +{ + Token::addKey("true", KEY_TRUE); + Token::addKey("false", KEY_FALSE); + Token::addKey(":", KEY_DEF); + Token::addKey("-", KEY_MINUS); +} + + +Parser::Parser(starshatter::foundation::Reader r) : + lexer {new Scanner(std::move(r))} +{ + Token::addKey("true", KEY_TRUE); + Token::addKey("false", KEY_FALSE); + Token::addKey(":", KEY_DEF); + Token::addKey("-", KEY_MINUS); +} + +Parser::~Parser() +{ + if (lexer) + delete lexer; + //Token::close(); +} + +Term* +Parser::ParseTerm() +{ + Term* t = ParseTermBase(); + if (t == 0) return t; + + Term* t2 = ParseTermRest(t); + + return t2; +} + +Term* +Parser::ParseTermRest(Term* base) +{ + Token t = lexer->Get(); + + switch (t.type()) { + default: + lexer->PutBack(); + return base; + + case Token::StringLiteral: { + // concatenate adjacent string literal tokens: + TermText* text = base->isText(); + if (text) { + TermText* base2 = new TermText(text->value() + t.symbol()(1, t.symbol().length()-2)); + delete base; + return ParseTermRest(base2); + } + else { + lexer->PutBack(); + } + } + break; + + case Token::Keyword: + switch (t.key()) { + case KEY_DEF: + if (base->isText()) + return new TermDef(base->isText(), ParseTerm()); + else + return error("(Parse) illegal lhs in def", t); + + default: + lexer->PutBack(); + return base; + } + break; + } + + return base; +} + +static int xtol(const char* p) +{ + int n = 0; + + while (*p) { + char digit = *p++; + n *= 16; + + if (digit >= '0' && digit <= '9') + n += digit - '0'; + + else if (digit >= 'a' && digit <= 'f') + n += digit - 'a' + 10; + + else if (digit >= 'A' && digit <= 'F') + n += digit - 'A' + 10; + } + + return n; +} + +Term* +Parser::ParseTermBase() +{ + Token t = lexer->Get(); + int n = 0; + double d = 0.0; + + switch (t.type()) { + case Token::IntLiteral: { + if (dump_tokens) + Print("%s", t.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) t.symbol().length(); i++) + if (t.symbol()[i] != '_') + *p++ = t.symbol()[i]; + *p++ = '\0'; + + // handle hex notation: + if (nstr[1] == 'x') + n = xtol(nstr+2); + + else + n = atol(nstr); + + return new TermNumber(n); + } + + case Token::FloatLiteral: { + if (dump_tokens) + Print("%s", t.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) t.symbol().length(); i++) + if (t.symbol()[i] != '_') + *p++ = t.symbol()[i]; + *p++ = '\0'; + + d = atof(nstr); + return new TermNumber(d); + } + + case Token::StringLiteral: + if (dump_tokens) + Print("%s", t.symbol().data()); + + return new TermText(t.symbol()(1, t.symbol().length()-2)); + + case Token::AlphaIdent: + if (dump_tokens) + Print("%s", t.symbol().data()); + + return new TermText(t.symbol()); + + case Token::Keyword: + if (dump_tokens) + Print("%s", t.symbol().data()); + + switch (t.key()) { + case KEY_FALSE: return new TermBool(0); + case KEY_TRUE: return new TermBool(1); + + case KEY_MINUS: { + Token next = lexer->Get(); + if (next.type() == Token::IntLiteral) { + if (dump_tokens) + Print("%s", next.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) next.symbol().length(); i++) + if (next.symbol()[i] != '_') + *p++ = next.symbol()[i]; + *p++ = '\0'; + + n = -1 * atol(nstr); + return new TermNumber(n); + } + else if (next.type() == Token::FloatLiteral) { + if (dump_tokens) + Print("%s", next.symbol().data()); + + char nstr[256], *p = nstr; + for (int i = 0; i < (int) next.symbol().length(); i++) + if (next.symbol()[i] != '_') + *p++ = next.symbol()[i]; + *p++ = '\0'; + + d = -1.0 * atof(nstr); + return new TermNumber(d); + } + else { + lexer->PutBack(); + return error("(Parse) illegal token '-': number expected", next); + } + } + break; + + default: + lexer->PutBack(); + return 0; + } + + case Token::LParen: return ParseArray(); + + case Token::LBrace: return ParseStruct(); + + case Token::CharLiteral: + return error("(Parse) illegal token ", t); + + default: + lexer->PutBack(); + return 0; + } +} + +TermArray* +Parser::ParseArray() +{ + TermList* elems = ParseTermList(0); + Token end = lexer->Get(); + + if (end.type() != Token::RParen) + return (TermArray*) error("(Parse) ')' missing in array-decl", end); + + return new TermArray(elems); +} + +TermStruct* +Parser::ParseStruct() +{ + TermList* elems = ParseTermList(1); + Token end = lexer->Get(); + + if (end.type() != Token::RBrace) + return (TermStruct*) error("(Parse) '}' missing in struct", end); + + return new TermStruct(elems); +} + +TermList* +Parser::ParseTermList(int for_struct) +{ + TermList* tlist = new TermList; + + Term* term = ParseTerm(); + while (term) { + if (for_struct && !term->isDef()) { + return (TermList*) error("(Parse) non-definition term in struct"); + } + else if (!for_struct && term->isDef()) { + return (TermList*) error("(Parse) illegal definition in array"); + } + + tlist->append(term); + Token t = lexer->Get(); + + /*** OLD WAY: COMMA SEPARATORS REQUIRED *** + if (t.type() != Token::Comma) { + lexer->PutBack(); + term = 0; + } + else + term = ParseTerm(); + /*******************************************/ + + // NEW WAY: COMMA SEPARATORS OPTIONAL: + if (t.type() != Token::Comma) { + lexer->PutBack(); + } + + term = ParseTerm(); + } + + return tlist; +} diff --git a/DefinitionEx/src/Term.cpp b/DefinitionEx/src/Term.cpp new file mode 100644 index 0000000..acfdcb8 --- /dev/null +++ b/DefinitionEx/src/Term.cpp @@ -0,0 +1,119 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Implementation of the Term class +*/ + +#include + +#include + +// +-------------------------------------------------------------------+ + +Term* +error(const char* s1, const char* s2) +{ + Print("ERROR: "); + if (s1) Print(s1); + if (s2) Print(s2); + Print("\n\n"); + return 0; +} + +// +-------------------------------------------------------------------+ + +void TermBool::print(int level) { if (level > 0) Print(val? "true" : "false"); else Print("..."); } +void TermNumber::print(int level){ if (level > 0) Print("%g", val); else Print("..."); } +void TermText::print(int level) { if (level > 0) Print("\"%s\"", val.data()); else Print("..."); } + +// +-------------------------------------------------------------------+ + +TermArray::TermArray(TermList* elist) +{ + elems = elist; +} + +TermArray::~TermArray() +{ + if (elems) elems->destroy(); + delete elems; +} + +void +TermArray::print(int level) +{ + if (level > 1) { + Print("("); + + if (elems) { + for (int i = 0; i < elems->size(); i++) { + elems->at(i)->print(level-1); + if (i < elems->size() -1) + Print(", "); + } + } + + Print(") "); + } + else Print("(...) "); +} + +// +-------------------------------------------------------------------+ + +TermStruct::TermStruct(TermList* elist) +{ + elems = elist; +} + +TermStruct::~TermStruct() +{ + if (elems) elems->destroy(); + delete elems; +} + +void +TermStruct::print(int level) +{ + if (level > 1) { + Print("{"); + + if (elems) { + for (int i = 0; i < elems->size(); i++) { + elems->at(i)->print(level-1); + if (i < elems->size() -1) + Print(", "); + } + } + + Print("} "); + } + else Print("{...} "); +} + +// +-------------------------------------------------------------------+ + +TermDef::~TermDef() +{ + delete mname; + delete mval; +} + +void +TermDef::print(int level) +{ + if (level >= 0) { + mname->print(level); + Print(": "); + mval->print(level-1); + } + else Print("..."); +} + +// +-------------------------------------------------------------------+ diff --git a/DefinitionEx/src/Token.cpp b/DefinitionEx/src/Token.cpp new file mode 100644 index 0000000..3f516de --- /dev/null +++ b/DefinitionEx/src/Token.cpp @@ -0,0 +1,549 @@ +/* Starshatter: The Open Source Project + Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors + Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors + Copyright (c) 1997-2006, Destroyer Studios LLC. + + AUTHOR: John DiCamillo + + + OVERVIEW + ======== + Scanner class implementation +*/ + +#include + +#include + +#include +#include + +#include +#include + +// +-------------------------------------------------------------------+ + +bool Token::hidecom = true; +char Token::combeg[3] = "//"; +char Token::comend[3] = "\n"; +char Token::altbeg[3] = "/*"; +char Token::altend[3] = "*/"; +Dictionary Token::keymap; + +// +-------------------------------------------------------------------+ + +Token::Token() + : mType(Undefined), mKey(0), mLine(0), mColumn(0) +{ + mLength = 0; + mSymbol[0] = '\0'; +} + +Token::Token(const Token& rhs) + : mType(rhs.mType), mKey(rhs.mKey), mLine(rhs.mLine), mColumn(rhs.mColumn) +{ + mLength = rhs.mLength; + if (mLength < 8) { + strcpy(mSymbol, rhs.mSymbol); + } + else { + mFullSymbol = new char[mLength + 1]; + strcpy(mFullSymbol, rhs.mFullSymbol); + } +} + +Token::Token(int t) + : mType(t), mKey(0), mLine(0), mColumn(0) +{ + mLength = 0; + mSymbol[0] = '\0'; +} + +Token::Token(const char* s, int t, int k, int l, int c) + : mType(t), mKey(k), mLine(l), mColumn(c) +{ + mLength = strlen(s); + if (mLength < 8) { + strcpy(mSymbol, s); + } + else { + mFullSymbol = new char[mLength + 1]; + strcpy(mFullSymbol, s); + } +} + +Token::Token(const Text& s, int t, int k, int l, int c) + : mType(t), mKey(k), mLine(l), mColumn(c) +{ + mLength = s.length(); + if (mLength < 8) { + strcpy(mSymbol, s.data()); + } + else { + mFullSymbol = new char[mLength + 1]; + strcpy(mFullSymbol, s.data()); + } +} + +Token::~Token() +{ + if (mLength >= 8) + delete [] mFullSymbol; +} + +// +-------------------------------------------------------------------+ + +void +Token::close() +{ + keymap.clear(); +} + +// +-------------------------------------------------------------------+ + +Token& +Token::operator = (const Token& rhs) +{ + if (mLength >= 8) + delete [] mFullSymbol; + + mLength = rhs.mLength; + if (mLength < 8) { + strcpy(mSymbol, rhs.mSymbol); + } + else { + mFullSymbol = new char[mLength + 1]; + strcpy(mFullSymbol, rhs.mFullSymbol); + } + + mType = rhs.mType; + mKey = rhs.mKey; + mLine = rhs.mLine; + mColumn = rhs.mColumn; + + return *this; +} + +// +-------------------------------------------------------------------+ + +bool +Token::match(const Token& ref) const +{ + if (mType == ref.mType) { // if types match + if (ref.mLength == 0) // if no symbol to match + return true; // match! + + else if (mLength == ref.mLength) { // else if symbols match + if (mLength < 8) { + if (!strcmp(mSymbol, ref.mSymbol)) + return true; // match! + } + else { + if (!strcmp(mFullSymbol, ref.mFullSymbol)) + return true; // match! + } + } + } + + return false; +} + +// +-------------------------------------------------------------------+ + +Text +Token::symbol() const +{ + if (mLength < 8) + return Text(mSymbol); + else + return Text(mFullSymbol); +} + +// +-------------------------------------------------------------------+ + +void +Token::addKey(const Text& k, int v) +{ + keymap.insert(k, v); +} + +// +-------------------------------------------------------------------+ + +void +Token::addKeys(Dictionary& keys) +{ + DictionaryIter iter = keys; + while (++iter) + keymap.insert(iter.key(), iter.value()); +} + +// +-------------------------------------------------------------------+ + +bool +Token::findKey(const Text& k, int& v) +{ + if (keymap.contains(k)) { + v = keymap.find(k, 0); + return true; + } + else + return false; +} + +// +-------------------------------------------------------------------+ + +void +Token::comments(const Text& begin, const Text& end) +{ + combeg[0] = begin(0); + if (begin.length() > 1) combeg[1] = begin(1); + else combeg[1] = '\0'; + + comend[0] = end(0); + if (end.length() > 1) comend[1] = end(1); + else comend[1] = '\0'; +} + +// +-------------------------------------------------------------------+ + +void +Token::altComments(const Text& begin, const Text& end) +{ + altbeg[0] = begin(0); + if (begin.length() > 1) altbeg[1] = begin(1); + else altbeg[1] = '\0'; + + altend[0] = end(0); + if (end.length() > 1) altend[1] = end(1); + else altend[1] = '\0'; +} + +// +-------------------------------------------------------------------+ + +Text +Token::typestr() const +{ + Text t = "Unknown"; + switch (type()) { + case Undefined: t = "Undefined"; break; + case Keyword: t = "Keyword"; break; + case AlphaIdent: t = "AlphaIdent"; break; + case SymbolicIdent: t = "SymbolicIdent"; break; + case Comment: t = "Comment"; break; + case IntLiteral: t = "IntLiteral"; break; + case FloatLiteral: t = "FloatLiteral"; break; + case StringLiteral: t = "StringLiteral"; break; + case CharLiteral: t = "CharLiteral"; break; + case Dot: t = "Dot"; break; + case Comma: t = "Comma"; break; + case Colon: t = "Colon"; break; + case Semicolon: t = "Semicolon"; break; + case LParen: t = "LParen"; break; + case RParen: t = "RParen"; break; + case LBracket: t = "LBracket"; break; + case RBracket: t = "RBracket"; break; + case LBrace: t = "LBrace"; break; + case RBrace: t = "RBrace"; break; + case EOT: t = "EOT"; break; + case LastTokenType: t = "LastTokenType"; break; + } + + return t; +} + +// +-------------------------------------------------------------------+ + +Text +Token::describe(const Text& tok) +{ + Text d; + + switch (tok(0)) { + case '.' : d = "Token::Dot"; break; + case ',' : d = "Token::Comma"; break; + case ';' : d = "Token::Semicolon"; break; + case '(' : d = "Token::LParen"; break; + case ')' : d = "Token::RParen"; break; + case '[' : d = "Token::LBracket"; break; + case ']' : d = "Token::RBracket"; break; + case '{' : d = "Token::LBrace"; break; + case '}' : d = "Token::RBrace"; break; + default : break; + } + + if (d.length() == 0) { + if (isalpha(tok(0))) + d = "\"" + tok + "\", Token::AlphaIdent"; + else if (isdigit(tok(0))) { + if (tok.contains(".")) + d = "\"" + tok + "\", Token::FloatLiteral"; + else + d = "\"" + tok + "\", Token::IntLiteral"; + } + else + d = "\"" + tok + "\", Token::SymbolicIdent"; + } + + return d; +} + +// +-------------------------------------------------------------------+ + +Scanner::Scanner() : + reader {}, + str {nullptr}, + index {0}, + old_index {0}, + length {0}, + line {0}, + old_line {0}, + lineStart {0} +{ +} + + +Scanner::Scanner(starshatter::foundation::Reader r) : + reader {std::move(r)}, + str {nullptr}, + index {0}, + old_index {0}, + length {0}, + line {0}, + old_line {0}, + lineStart {0} +{ +} + + +Scanner::Scanner(const Text& s) : + reader {}, + str {nullptr}, + index {0}, + old_index {0}, + length {static_cast(s.length())}, + line {0}, + old_line {0}, + lineStart {0} +{ + str = new char [s.length() + 1]; + strcpy(str, s.data()); +} + + +Scanner::~Scanner() +{ + if (str) + delete [] str; +} + +// +-------------------------------------------------------------------+ + +void +Scanner::Load(const Text& s) +{ + delete [] str; + str = new char [s.length() + 1]; + strcpy(str, s.data()); + + index = 0; + old_index = 0; + best = Token(); + length = s.length(); + line = 0; + old_line = 0; + lineStart = 0; +} + +// +-------------------------------------------------------------------+ + +Token +Scanner::Get(Need need) +{ + int type = Token::EOT; + old_index = index; + old_line = line; + + eos = str + length; + p = str + index; + + if (p >= eos) { + if (need == Demand && reader.valid()) { + Load(reader.more()); + if (length > 0) + return Get(need); + } + return Token("", type, 0, line, 0); + } + + while (isspace(*p) && p < eos) { // skip initial white space + if (*p == '\n') { + line++; + lineStart = p - str; + } + p++; + } + + if (p >= eos) { + if (need == Demand && reader.valid()) { + Load(reader.more()); + if (length > 0) + return Get(need); + } + return Token("", type, 0, line, 0); + } + + Token result; + size_t start = p - str; + + if (*p == '"' || *p == '\'') { // special case for quoted tokens + + if (*p == '"') type = Token::StringLiteral; + else type = Token::CharLiteral; + + char match = *p; + while (++p < eos) { + if (*p == match) { // find matching quote + if (*(p-1) != '\\') { // if not escaped + p++; // token includes matching quote + break; + } + } + } + } + + // generic delimited comments + else if (*p == Token::comBeg(0) && + (!Token::comBeg(1) || *(p+1) == Token::comBeg(1))) { + type = Token::Comment; + while (++p < eos) { + if (*p == Token::comEnd(0) && + (!Token::comEnd(1) || *(p+1) == Token::comEnd(1))) { + p++; if (Token::comEnd(1)) p++; + break; + } + } + } + + // alternate form delimited comments + else if (*p == Token::altBeg(0) && + (!Token::altBeg(1) || *(p+1) == Token::altBeg(1))) { + type = Token::Comment; + while (++p < eos) { + if (*p == Token::altEnd(0) && + (!Token::altEnd(1) || *(p+1) == Token::altEnd(1))) { + p++; if (Token::altEnd(1)) p++; + break; + } + } + } + + else if (*p == '.') type = Token::Dot; + else if (*p == ',') type = Token::Comma; + else if (*p == ';') type = Token::Semicolon; + else if (*p == '(') type = Token::LParen; + else if (*p == ')') type = Token::RParen; + else if (*p == '[') type = Token::LBracket; + else if (*p == ']') type = Token::RBracket; + else if (*p == '{') type = Token::LBrace; + else if (*p == '}') type = Token::RBrace; + + // use lexical sub-parser for ints and floats + else if (isdigit(*p)) + type = GetNumeric(); + + else if (IsSymbolic(*p)) { + type = Token::SymbolicIdent; + while (IsSymbolic(*p)) p++; + } + + else { + type = Token::AlphaIdent; + while (IsAlpha(*p)) p++; + } + + size_t extent = (p - str) - start; + + if (extent < 1) extent = 1; // always get at least one character + + index = start + extent; // advance the cursor + int col = start - lineStart; + if (line == 0) col++; + + char* buf = new char [extent + 1]; + strncpy(buf, str + start, extent); + buf[extent] = '\0'; + + if (type == Token::Comment && Token::hidecom) { + delete [] buf; + if (Token::comEnd(0) == '\n') { + line++; + lineStart = p - str; + } + return Get(need); + } + + if (type == Token::AlphaIdent || // check for keyword + type == Token::SymbolicIdent) { + int val; + if (Token::findKey(Text(buf), val)) + result = Token(buf, Token::Keyword, val, line+1, col); + } + + if (result.mType != Token::Keyword) + result = Token(buf, type, 0, line+1, col); + + if (line+1 > (size_t) best.mLine || + (line+1 == (size_t) best.mLine && col > best.mColumn)) + best = result; + + delete [] buf; + return result; +} + +// +-------------------------------------------------------------------+ + +int +Scanner::GetNumeric() +{ + int type = Token::IntLiteral; // assume int + + if (*p == '0' && *(p+1) == 'x') { // check for hex: + p += 2; + while (isxdigit(*p)) p++; + return type; + } + + while (isdigit(*p) || *p == '_') p++; // whole number part + + if (*p == '.') { p++; // optional fract part + type = Token::FloatLiteral; // implies float + + while (isdigit(*p) || *p == '_') p++; // fractional part + } + + if (*p == 'E' || *p == 'e') { p++; // optional exponent + if (*p == '+' || *p == '-') p++; // which may be signed + while (isdigit(*p)) p++; + + type = Token::FloatLiteral; // implies float + } + + return type; +} + +// +-------------------------------------------------------------------+ + +bool +Scanner::IsAlpha(char c) +{ + return (isalpha(*p) || isdigit(*p) || (*p == '_'))?true:false; +} + +// +-------------------------------------------------------------------+ + +bool +Scanner::IsSymbolic(char c) +{ + const char* s = "+-*/\\<=>~!@#$%^&|:"; + return strchr(s, c)?true:false; +} diff --git a/DefinitionEx/test/parse_utils_like.cpp b/DefinitionEx/test/parse_utils_like.cpp new file mode 100644 index 0000000..fc943f7 --- /dev/null +++ b/DefinitionEx/test/parse_utils_like.cpp @@ -0,0 +1,106 @@ +#include + +#include +#include +#include + + +TEST(DefinitionEx, ParseUtilsLike) +{ + std::string content { + "TEST\n\n" + "number: 3\n" + "bool: true\n" + "short: hello\n" + "longer: \"hello there\"\n" + "array: (0.98, -1.0, 0.35)\n" + "struct: {\n" + " name: \"trailing comma\",\n" + "}\n" + }; + Parser parser({content.data(), content.size()}); + Term* term = parser.ParseTerm(); + ASSERT_NE(nullptr, term); + { + TermText* text = term->isText(); + ASSERT_NE(nullptr, text); + EXPECT_STREQ("TEST", text->value()); + } + { + delete term; + term = parser.ParseTerm(); + ASSERT_NE(nullptr, term); + TermDef* def = term->isDef(); + ASSERT_NE(nullptr, def); + EXPECT_STREQ("number", def->name()->value()); + TermNumber* num = def->term()->isNumber(); + ASSERT_NE(nullptr, num); + EXPECT_EQ(3, static_cast(num->value())); + } + { + delete term; + term = parser.ParseTerm(); + ASSERT_NE(nullptr, term); + TermDef* def = term->isDef(); + ASSERT_NE(nullptr, def); + EXPECT_STREQ("bool", def->name()->value()); + TermBool* boolean = def->term()->isBool(); + ASSERT_NE(nullptr, boolean); + EXPECT_TRUE(boolean->value()); + } + { + delete term; + term = parser.ParseTerm(); + ASSERT_NE(nullptr, term); + TermDef* def = term->isDef(); + ASSERT_NE(nullptr, def); + EXPECT_STREQ("short", def->name()->value()); + TermText* text = def->term()->isText(); + ASSERT_NE(nullptr, text); + EXPECT_STREQ("hello", text->value()); + } + { + delete term; + term = parser.ParseTerm(); + ASSERT_NE(nullptr, term); + TermDef* def = term->isDef(); + ASSERT_NE(nullptr, def); + EXPECT_STREQ("longer", def->name()->value()); + TermText* text = def->term()->isText(); + ASSERT_NE(nullptr, text); + EXPECT_STREQ("hello there", text->value()); + } + { + delete term; + term = parser.ParseTerm(); + ASSERT_NE(nullptr, term); + TermDef* def = term->isDef(); + ASSERT_NE(nullptr, def); + EXPECT_STREQ("array", def->name()->value()); + TermArray* array = def->term()->isArray(); + ASSERT_NE(nullptr, array); + ASSERT_EQ(3, array->elements()->size()); + EXPECT_FLOAT_EQ(0.98, array->elements()->at(0)->isNumber()->value()); // what if not a number? + EXPECT_FLOAT_EQ(-1.0, array->elements()->at(1)->isNumber()->value()); + EXPECT_FLOAT_EQ(0.35, array->elements()->at(2)->isNumber()->value()); + } + { + delete term; + term = parser.ParseTerm(); + ASSERT_NE(nullptr, term); + TermDef* def = term->isDef(); + ASSERT_NE(nullptr, def); + EXPECT_STREQ("struct", def->name()->value()); + ASSERT_NE(nullptr, def->term()); + TermStruct* strukt = def->term()->isStruct(); + ASSERT_NE(nullptr, strukt); + ASSERT_EQ(1, strukt->elements()->size()); + def = strukt->elements()->at(0)->isDef(); + ASSERT_NE(nullptr, def); + EXPECT_STREQ("name", def->name()->value()); + TermText* text = def->term()->isText(); + ASSERT_NE(nullptr, text); + EXPECT_STREQ("trailing comma", text->value()); + } + Token::close(); +} -- cgit v1.1