summaryrefslogtreecommitdiffhomepage
path: root/DefinitionEx
diff options
context:
space:
mode:
authorAki <please@ignore.pl>2022-04-02 19:19:08 +0200
committerAki <please@ignore.pl>2022-04-02 20:23:41 +0200
commit94ef3b0248485714ca8e635af3811d788ee930e2 (patch)
treee9a9acea17e96a6ce4ce2a5dc790e9704a19dedf /DefinitionEx
parentbeb4c7aa02cfe80cdfc6793406823c5f32cb0b74 (diff)
downloadstarshatter-94ef3b0248485714ca8e635af3811d788ee930e2.zip
starshatter-94ef3b0248485714ca8e635af3811d788ee930e2.tar.gz
starshatter-94ef3b0248485714ca8e635af3811d788ee930e2.tar.bz2
Moved def format implementation to own module
Diffstat (limited to 'DefinitionEx')
-rw-r--r--DefinitionEx/CMakeLists.txt15
-rw-r--r--DefinitionEx/Parser.cpp307
-rw-r--r--DefinitionEx/Parser.h45
-rw-r--r--DefinitionEx/Term.cpp119
-rw-r--r--DefinitionEx/Term.h171
-rw-r--r--DefinitionEx/Token.cpp544
-rw-r--r--DefinitionEx/Token.h145
7 files changed, 1346 insertions, 0 deletions
diff --git a/DefinitionEx/CMakeLists.txt b/DefinitionEx/CMakeLists.txt
new file mode 100644
index 0000000..9dd6620
--- /dev/null
+++ b/DefinitionEx/CMakeLists.txt
@@ -0,0 +1,15 @@
+project(DefinitionEx)
+add_library(
+ DefinitionEx STATIC
+ Parser.cpp
+ Term.cpp
+ Token.cpp
+ )
+target_include_directories(
+ DefinitionEx
+ PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+ )
+target_link_libraries(
+ DefinitionEx
+ PUBLIC FoundationEx
+ )
diff --git a/DefinitionEx/Parser.cpp b/DefinitionEx/Parser.cpp
new file mode 100644
index 0000000..09827cf
--- /dev/null
+++ b/DefinitionEx/Parser.cpp
@@ -0,0 +1,307 @@
+/* Starshatter: The Open Source Project
+ Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+ Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+ Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+ AUTHOR: John DiCamillo
+
+
+ OVERVIEW
+ ========
+ Implementation of the generic Parser class
+*/
+
+#include "Reader.h"
+#include "Token.h"
+#include "Parser.h"
+#include "Term.h"
+#include "Utils.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+enum KEYS { KEY_TRUE, KEY_FALSE, KEY_DEF, KEY_MINUS };
+
+static int dump_tokens = 0;
+
+// +-------------------------------------------------------------------+
+
+Term* error(char* msg, const Token& token)
+{
+ static char buf[1024];
+ sprintf_s(buf, " near '%s' in line %d.", (const char*) token.symbol(), token.line());
+
+ return error(msg, buf);
+}
+
+// +-------------------------------------------------------------------+
+
+Parser::Parser(Reader* r)
+{
+ reader = r ? r : new ConsoleReader;
+ lexer = new Scanner(reader);
+
+ Token::addKey("true", KEY_TRUE);
+ Token::addKey("false", KEY_FALSE);
+ Token::addKey(":", KEY_DEF);
+ Token::addKey("-", KEY_MINUS);
+}
+
+Parser::~Parser()
+{
+ delete lexer;
+ delete reader;
+ //Token::close();
+}
+
+Term*
+Parser::ParseTerm()
+{
+ Term* t = ParseTermBase();
+ if (t == 0) return t;
+
+ Term* t2 = ParseTermRest(t);
+
+ return t2;
+}
+
+Term*
+Parser::ParseTermRest(Term* base)
+{
+ Token t = lexer->Get();
+
+ switch (t.type()) {
+ default:
+ lexer->PutBack();
+ return base;
+
+ case Token::StringLiteral: {
+ // concatenate adjacent string literal tokens:
+ TermText* text = base->isText();
+ if (text) {
+ TermText* base2 = new TermText(text->value() + t.symbol()(1, t.symbol().length()-2));
+ delete base;
+ return ParseTermRest(base2);
+ }
+ else {
+ lexer->PutBack();
+ }
+ }
+ break;
+
+ case Token::Keyword:
+ switch (t.key()) {
+ case KEY_DEF:
+ if (base->isText())
+ return new TermDef(base->isText(), ParseTerm());
+ else
+ return error("(Parse) illegal lhs in def", t);
+
+ default:
+ lexer->PutBack();
+ return base;
+ }
+ break;
+ }
+
+ return base;
+}
+
+static int xtol(const char* p)
+{
+ int n = 0;
+
+ while (*p) {
+ char digit = *p++;
+ n *= 16;
+
+ if (digit >= '0' && digit <= '9')
+ n += digit - '0';
+
+ else if (digit >= 'a' && digit <= 'f')
+ n += digit - 'a' + 10;
+
+ else if (digit >= 'A' && digit <= 'F')
+ n += digit - 'A' + 10;
+ }
+
+ return n;
+}
+
+Term*
+Parser::ParseTermBase()
+{
+ Token t = lexer->Get();
+ int n = 0;
+ double d = 0.0;
+
+ switch (t.type()) {
+ case Token::IntLiteral: {
+ if (dump_tokens)
+ Print("%s", t.symbol().data());
+
+ char nstr[256], *p = nstr;
+ for (int i = 0; i < (int) t.symbol().length(); i++)
+ if (t.symbol()[i] != '_')
+ *p++ = t.symbol()[i];
+ *p++ = '\0';
+
+ // handle hex notation:
+ if (nstr[1] == 'x')
+ n = xtol(nstr+2);
+
+ else
+ n = atol(nstr);
+
+ return new TermNumber(n);
+ }
+
+ case Token::FloatLiteral: {
+ if (dump_tokens)
+ Print("%s", t.symbol().data());
+
+ char nstr[256], *p = nstr;
+ for (int i = 0; i < (int) t.symbol().length(); i++)
+ if (t.symbol()[i] != '_')
+ *p++ = t.symbol()[i];
+ *p++ = '\0';
+
+ d = atof(nstr);
+ return new TermNumber(d);
+ }
+
+ case Token::StringLiteral:
+ if (dump_tokens)
+ Print("%s", t.symbol().data());
+
+ return new TermText(t.symbol()(1, t.symbol().length()-2));
+
+ case Token::AlphaIdent:
+ if (dump_tokens)
+ Print("%s", t.symbol().data());
+
+ return new TermText(t.symbol());
+
+ case Token::Keyword:
+ if (dump_tokens)
+ Print("%s", t.symbol().data());
+
+ switch (t.key()) {
+ case KEY_FALSE: return new TermBool(0);
+ case KEY_TRUE: return new TermBool(1);
+
+ case KEY_MINUS: {
+ Token next = lexer->Get();
+ if (next.type() == Token::IntLiteral) {
+ if (dump_tokens)
+ Print("%s", next.symbol().data());
+
+ char nstr[256], *p = nstr;
+ for (int i = 0; i < (int) next.symbol().length(); i++)
+ if (next.symbol()[i] != '_')
+ *p++ = next.symbol()[i];
+ *p++ = '\0';
+
+ n = -1 * atol(nstr);
+ return new TermNumber(n);
+ }
+ else if (next.type() == Token::FloatLiteral) {
+ if (dump_tokens)
+ Print("%s", next.symbol().data());
+
+ char nstr[256], *p = nstr;
+ for (int i = 0; i < (int) next.symbol().length(); i++)
+ if (next.symbol()[i] != '_')
+ *p++ = next.symbol()[i];
+ *p++ = '\0';
+
+ d = -1.0 * atof(nstr);
+ return new TermNumber(d);
+ }
+ else {
+ lexer->PutBack();
+ return error("(Parse) illegal token '-': number expected", next);
+ }
+ }
+ break;
+
+ default:
+ lexer->PutBack();
+ return 0;
+ }
+
+ case Token::LParen: return ParseArray();
+
+ case Token::LBrace: return ParseStruct();
+
+ case Token::CharLiteral:
+ return error("(Parse) illegal token ", t);
+
+ default:
+ lexer->PutBack();
+ return 0;
+ }
+}
+
+TermArray*
+Parser::ParseArray()
+{
+ TermList* elems = ParseTermList(0);
+ Token end = lexer->Get();
+
+ if (end.type() != Token::RParen)
+ return (TermArray*) error("(Parse) ')' missing in array-decl", end);
+
+ return new TermArray(elems);
+}
+
+TermStruct*
+Parser::ParseStruct()
+{
+ TermList* elems = ParseTermList(1);
+ Token end = lexer->Get();
+
+ if (end.type() != Token::RBrace)
+ return (TermStruct*) error("(Parse) '}' missing in struct", end);
+
+ return new TermStruct(elems);
+}
+
+TermList*
+Parser::ParseTermList(int for_struct)
+{
+ TermList* tlist = new TermList;
+
+ Term* term = ParseTerm();
+ while (term) {
+ if (for_struct && !term->isDef()) {
+ return (TermList*) error("(Parse) non-definition term in struct");
+ }
+ else if (!for_struct && term->isDef()) {
+ return (TermList*) error("(Parse) illegal definition in array");
+ }
+
+ tlist->append(term);
+ Token t = lexer->Get();
+
+ /*** OLD WAY: COMMA SEPARATORS REQUIRED ***
+ if (t.type() != Token::Comma) {
+ lexer->PutBack();
+ term = 0;
+ }
+ else
+ term = ParseTerm();
+ /*******************************************/
+
+ // NEW WAY: COMMA SEPARATORS OPTIONAL:
+ if (t.type() != Token::Comma) {
+ lexer->PutBack();
+ }
+
+ term = ParseTerm();
+ }
+
+ return tlist;
+}
+
+
+
diff --git a/DefinitionEx/Parser.h b/DefinitionEx/Parser.h
new file mode 100644
index 0000000..84fe268
--- /dev/null
+++ b/DefinitionEx/Parser.h
@@ -0,0 +1,45 @@
+/* Starshatter: The Open Source Project
+ Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+ Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+ Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+ AUTHOR: John DiCamillo
+
+
+ OVERVIEW
+ ========
+ Declaration of the generic Parser class
+*/
+
+#ifndef PARSER_H
+#define PARSER_H
+
+#include "Text.h"
+#include "Term.h"
+
+// +-------------------------------------------------------------------+
+
+class Reader;
+class Scanner;
+
+// +-------------------------------------------------------------------+
+
+class Parser
+{
+public:
+ Parser(Reader* r = 0);
+ ~Parser();
+
+ Term* ParseTerm();
+ Term* ParseTermBase();
+ Term* ParseTermRest(Term* base);
+ TermList* ParseTermList(int for_struct);
+ TermArray* ParseArray();
+ TermStruct* ParseStruct();
+
+private:
+ Reader* reader;
+ Scanner* lexer;
+};
+
+#endif
diff --git a/DefinitionEx/Term.cpp b/DefinitionEx/Term.cpp
new file mode 100644
index 0000000..acd2c74
--- /dev/null
+++ b/DefinitionEx/Term.cpp
@@ -0,0 +1,119 @@
+/* Starshatter: The Open Source Project
+ Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+ Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+ Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+ AUTHOR: John DiCamillo
+
+
+ OVERVIEW
+ ========
+ Implementation of the Term class
+*/
+
+
+#include "Term.h"
+#include "Utils.h"
+
+// +-------------------------------------------------------------------+
+
+Term*
+error(char* s1, char* s2)
+{
+ Print("ERROR: ");
+ if (s1) Print(s1);
+ if (s2) Print(s2);
+ Print("\n\n");
+ return 0;
+}
+
+// +-------------------------------------------------------------------+
+
+void TermBool::print(int level) { if (level > 0) Print(val? "true" : "false"); else Print("..."); }
+void TermNumber::print(int level){ if (level > 0) Print("%g", val); else Print("..."); }
+void TermText::print(int level) { if (level > 0) Print("\"%s\"", val.data()); else Print("..."); }
+
+// +-------------------------------------------------------------------+
+
+TermArray::TermArray(TermList* elist)
+{
+ elems = elist;
+}
+
+TermArray::~TermArray()
+{
+ if (elems) elems->destroy();
+ delete elems;
+}
+
+void
+TermArray::print(int level)
+{
+ if (level > 1) {
+ Print("(");
+
+ if (elems) {
+ for (int i = 0; i < elems->size(); i++) {
+ elems->at(i)->print(level-1);
+ if (i < elems->size() -1)
+ Print(", ");
+ }
+ }
+
+ Print(") ");
+ }
+ else Print("(...) ");
+}
+
+// +-------------------------------------------------------------------+
+
+TermStruct::TermStruct(TermList* elist)
+{
+ elems = elist;
+}
+
+TermStruct::~TermStruct()
+{
+ if (elems) elems->destroy();
+ delete elems;
+}
+
+void
+TermStruct::print(int level)
+{
+ if (level > 1) {
+ Print("{");
+
+ if (elems) {
+ for (int i = 0; i < elems->size(); i++) {
+ elems->at(i)->print(level-1);
+ if (i < elems->size() -1)
+ Print(", ");
+ }
+ }
+
+ Print("} ");
+ }
+ else Print("{...} ");
+}
+
+// +-------------------------------------------------------------------+
+
+TermDef::~TermDef()
+{
+ delete mname;
+ delete mval;
+}
+
+void
+TermDef::print(int level)
+{
+ if (level >= 0) {
+ mname->print(level);
+ Print(": ");
+ mval->print(level-1);
+ }
+ else Print("...");
+}
+
+// +-------------------------------------------------------------------+
diff --git a/DefinitionEx/Term.h b/DefinitionEx/Term.h
new file mode 100644
index 0000000..79e2fc3
--- /dev/null
+++ b/DefinitionEx/Term.h
@@ -0,0 +1,171 @@
+/* Starshatter: The Open Source Project
+ Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+ Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+ Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+ AUTHOR: John DiCamillo
+
+
+ OVERVIEW
+ ========
+ Declaration of the Abstract Syntax Tree classes
+*/
+
+
+#ifndef TERM_H
+#define TERM_H
+
+#include "Text.h"
+#include "List.h"
+
+// +-------------------------------------------------------------------+
+
+class Term;
+class TermBool;
+class TermNumber;
+class TermText;
+class TermArray;
+class TermDef;
+class TermStruct;
+
+// +-------------------------------------------------------------------+
+
+class Term
+{
+public:
+ static const char* TYPENAME() { return "Term"; }
+
+ Term() { }
+ virtual ~Term() { }
+
+ virtual int operator==(const Term& rhs) const { return 0; }
+
+ virtual void print(int level=10) { }
+
+ // conversion tests
+ virtual Term* touch() { return this; }
+ virtual TermBool* isBool() { return 0; }
+ virtual TermNumber* isNumber() { return 0; }
+ virtual TermText* isText() { return 0; }
+ virtual TermArray* isArray() { return 0; }
+ virtual TermDef* isDef() { return 0; }
+ virtual TermStruct* isStruct() { return 0; }
+};
+
+Term* error(char*, char* = 0);
+
+// +-------------------------------------------------------------------+
+
+typedef List<Term> TermList;
+typedef ListIter<Term> TermListIter;
+
+// +-------------------------------------------------------------------+
+
+class TermBool : public Term
+{
+public:
+ static const char* TYPENAME() { return "TermBool"; }
+
+ TermBool(bool v) : val(v) { }
+
+ virtual void print(int level=10);
+ virtual TermBool* isBool() { return this; }
+ bool value() const { return val; }
+
+private:
+ bool val;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermNumber : public Term
+{
+public:
+ static const char* TYPENAME() { return "TermNumber"; }
+
+ TermNumber(double v) : val(v) { }
+
+ virtual void print(int level=10);
+ virtual TermNumber* isNumber() { return this; }
+ double value() const { return val; }
+
+private:
+ double val;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermText : public Term
+{
+public:
+ static const char* TYPENAME() { return "TermText"; }
+
+ TermText(const Text& v) : val(v) { }
+
+ virtual void print(int level=10);
+ virtual TermText* isText() { return this; }
+ Text value() const { return val; }
+
+private:
+ Text val;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermArray : public Term
+{
+public:
+ static const char* TYPENAME() { return "TermArray"; }
+
+ TermArray(TermList* elist);
+ virtual ~TermArray();
+
+ virtual void print(int level=10);
+ virtual TermArray* isArray() { return this; }
+ TermList* elements() { return elems; }
+
+private:
+ TermList* elems;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermStruct : public Term
+{
+public:
+ static const char* TYPENAME() { return "TermStruct"; }
+
+ TermStruct(TermList* elist);
+ virtual ~TermStruct();
+
+ virtual void print(int level=10);
+
+ virtual TermStruct* isStruct() { return this; }
+ TermList* elements() { return elems; }
+
+private:
+ TermList* elems;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermDef : public Term
+{
+public:
+ static const char* TYPENAME() { return "TermDef"; }
+
+ TermDef(TermText* n, Term* v) : mname(n), mval(v) { }
+ virtual ~TermDef();
+
+ virtual void print(int level=10);
+ virtual TermDef* isDef() { return this; }
+
+ virtual TermText* name() { return mname; }
+ virtual Term* term() { return mval; }
+
+private:
+ TermText* mname;
+ Term* mval;
+};
+
+#endif
diff --git a/DefinitionEx/Token.cpp b/DefinitionEx/Token.cpp
new file mode 100644
index 0000000..2cc97b5
--- /dev/null
+++ b/DefinitionEx/Token.cpp
@@ -0,0 +1,544 @@
+/* Starshatter: The Open Source Project
+ Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+ Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+ Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+ AUTHOR: John DiCamillo
+
+
+ OVERVIEW
+ ========
+ Scanner class implementation
+*/
+
+#include "Token.h"
+#include "Reader.h"
+#include "Text.h"
+
+#include <ctype.h>
+
+// +-------------------------------------------------------------------+
+
+bool Token::hidecom = true;
+char Token::combeg[3] = "//";
+char Token::comend[3] = "\n";
+char Token::altbeg[3] = "/*";
+char Token::altend[3] = "*/";
+Dictionary<int> Token::keymap;
+
+// +-------------------------------------------------------------------+
+
+Token::Token()
+ : mType(Undefined), mKey(0), mLine(0), mColumn(0)
+{
+ mLength = 0;
+ mSymbol[0] = '\0';
+}
+
+Token::Token(const Token& rhs)
+ : mType(rhs.mType), mKey(rhs.mKey), mLine(rhs.mLine), mColumn(rhs.mColumn)
+{
+ mLength = rhs.mLength;
+ if (mLength < 8) {
+ strcpy_s(mSymbol, rhs.mSymbol);
+ }
+ else {
+ mFullSymbol = new char[mLength + 1];
+ strcpy(mFullSymbol, rhs.mFullSymbol);
+ }
+}
+
+Token::Token(int t)
+ : mType(t), mKey(0), mLine(0), mColumn(0)
+{
+ mLength = 0;
+ mSymbol[0] = '\0';
+}
+
+Token::Token(const char* s, int t, int k, int l, int c)
+ : mType(t), mKey(k), mLine(l), mColumn(c)
+{
+ mLength = strlen(s);
+ if (mLength < 8) {
+ strcpy_s(mSymbol, s);
+ }
+ else {
+ mFullSymbol = new char[mLength + 1];
+ strcpy(mFullSymbol, s);
+ }
+}
+
+Token::Token(const Text& s, int t, int k, int l, int c)
+ : mType(t), mKey(k), mLine(l), mColumn(c)
+{
+ mLength = s.length();
+ if (mLength < 8) {
+ strcpy_s(mSymbol, s.data());
+ }
+ else {
+ mFullSymbol = new char[mLength + 1];
+ strcpy(mFullSymbol, s.data());
+ }
+}
+
+Token::~Token()
+{
+ if (mLength >= 8)
+ delete [] mFullSymbol;
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::close()
+{
+ keymap.clear();
+}
+
+// +-------------------------------------------------------------------+
+
+Token&
+Token::operator = (const Token& rhs)
+{
+ if (mLength >= 8)
+ delete [] mFullSymbol;
+
+ mLength = rhs.mLength;
+ if (mLength < 8) {
+ strcpy_s(mSymbol, rhs.mSymbol);
+ }
+ else {
+ mFullSymbol = new char[mLength + 1];
+ strcpy(mFullSymbol, rhs.mFullSymbol);
+ }
+
+ mType = rhs.mType;
+ mKey = rhs.mKey;
+ mLine = rhs.mLine;
+ mColumn = rhs.mColumn;
+
+ return *this;
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Token::match(const Token& ref) const
+{
+ if (mType == ref.mType) { // if types match
+ if (ref.mLength == 0) // if no symbol to match
+ return true; // match!
+
+ else if (mLength == ref.mLength) { // else if symbols match
+ if (mLength < 8) {
+ if (!strcmp(mSymbol, ref.mSymbol))
+ return true; // match!
+ }
+ else {
+ if (!strcmp(mFullSymbol, ref.mFullSymbol))
+ return true; // match!
+ }
+ }
+ }
+
+ return false;
+}
+
+// +-------------------------------------------------------------------+
+
+Text
+Token::symbol() const
+{
+ if (mLength < 8)
+ return Text(mSymbol);
+ else
+ return Text(mFullSymbol);
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::addKey(const Text& k, int v)
+{
+ keymap.insert(k, v);
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::addKeys(Dictionary<int>& keys)
+{
+ DictionaryIter<int> iter = keys;
+ while (++iter)
+ keymap.insert(iter.key(), iter.value());
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Token::findKey(const Text& k, int& v)
+{
+ if (keymap.contains(k)) {
+ v = keymap.find(k, 0);
+ return true;
+ }
+ else
+ return false;
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::comments(const Text& begin, const Text& end)
+{
+ combeg[0] = begin(0);
+ if (begin.length() > 1) combeg[1] = begin(1);
+ else combeg[1] = '\0';
+
+ comend[0] = end(0);
+ if (end.length() > 1) comend[1] = end(1);
+ else comend[1] = '\0';
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::altComments(const Text& begin, const Text& end)
+{
+ altbeg[0] = begin(0);
+ if (begin.length() > 1) altbeg[1] = begin(1);
+ else altbeg[1] = '\0';
+
+ altend[0] = end(0);
+ if (end.length() > 1) altend[1] = end(1);
+ else altend[1] = '\0';
+}
+
+// +-------------------------------------------------------------------+
+
+Text
+Token::typestr() const
+{
+ Text t = "Unknown";
+ switch (type()) {
+ case Undefined: t = "Undefined"; break;
+ case Keyword: t = "Keyword"; break;
+ case AlphaIdent: t = "AlphaIdent"; break;
+ case SymbolicIdent: t = "SymbolicIdent"; break;
+ case Comment: t = "Comment"; break;
+ case IntLiteral: t = "IntLiteral"; break;
+ case FloatLiteral: t = "FloatLiteral"; break;
+ case StringLiteral: t = "StringLiteral"; break;
+ case CharLiteral: t = "CharLiteral"; break;
+ case Dot: t = "Dot"; break;
+ case Comma: t = "Comma"; break;
+ case Colon: t = "Colon"; break;
+ case Semicolon: t = "Semicolon"; break;
+ case LParen: t = "LParen"; break;
+ case RParen: t = "RParen"; break;
+ case LBracket: t = "LBracket"; break;
+ case RBracket: t = "RBracket"; break;
+ case LBrace: t = "LBrace"; break;
+ case RBrace: t = "RBrace"; break;
+ case EOT: t = "EOT"; break;
+ case LastTokenType: t = "LastTokenType"; break;
+ }
+
+ return t;
+}
+
+// +-------------------------------------------------------------------+
+
+Text
+Token::describe(const Text& tok)
+{
+ Text d;
+
+ switch (tok(0)) {
+ case '.' : d = "Token::Dot"; break;
+ case ',' : d = "Token::Comma"; break;
+ case ';' : d = "Token::Semicolon"; break;
+ case '(' : d = "Token::LParen"; break;
+ case ')' : d = "Token::RParen"; break;
+ case '[' : d = "Token::LBracket"; break;
+ case ']' : d = "Token::RBracket"; break;
+ case '{' : d = "Token::LBrace"; break;
+ case '}' : d = "Token::RBrace"; break;
+ default : break;
+ }
+
+ if (d.length() == 0) {
+ if (isalpha(tok(0)))
+ d = "\"" + tok + "\", Token::AlphaIdent";
+ else if (isdigit(tok(0))) {
+ if (tok.contains("."))
+ d = "\"" + tok + "\", Token::FloatLiteral";
+ else
+ d = "\"" + tok + "\", Token::IntLiteral";
+ }
+ else
+ d = "\"" + tok + "\", Token::SymbolicIdent";
+ }
+
+ return d;
+}
+
+// +-------------------------------------------------------------------+
+
+Scanner::Scanner(Reader* r)
+ : reader(r), str(0), index(0), old_index(0),
+ length(0), line(0), old_line(0), lineStart(0)
+{ }
+
+Scanner::Scanner(const Scanner& rhs)
+ : index(rhs.index), old_index(rhs.old_index), length(rhs.length),
+ reader(rhs.reader),
+ line(rhs.line), old_line(0), lineStart(rhs.lineStart)
+{
+ str = new char [strlen(rhs.str) + 1];
+ strcpy(str, rhs.str);
+}
+
+Scanner::Scanner(const Text& s)
+ : reader(0), index(0), old_index(0), length(s.length()), line(0),
+ old_line(0), lineStart(0)
+{
+ str = new char [s.length() + 1];
+ strcpy(str, s.data());
+}
+
+Scanner::~Scanner()
+{
+ delete [] str;
+}
+
+// +-------------------------------------------------------------------+
+
+Scanner&
+Scanner::operator = (const Scanner& rhs)
+{
+ delete [] str;
+ str = new char [strlen(rhs.str) + 1];
+ strcpy(str, rhs.str);
+
+ index = rhs.index;
+ old_index = rhs.old_index;
+ length = rhs.length;
+ line = rhs.line;
+ old_line = rhs.old_line;
+ lineStart = rhs.lineStart;
+
+ return *this;
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Scanner::Load(const Text& s)
+{
+ delete [] str;
+ str = new char [s.length() + 1];
+ strcpy(str, s.data());
+
+ index = 0;
+ old_index = 0;
+ best = Token();
+ length = s.length();
+ line = 0;
+ old_line = 0;
+ lineStart = 0;
+}
+
+// +-------------------------------------------------------------------+
+
+Token
+Scanner::Get(Need need)
+{
+ int type = Token::EOT;
+ old_index = index;
+ old_line = line;
+
+ eos = str + length;
+ p = str + index;
+
+ if (p >= eos) {
+ if (need == Demand && reader) {
+ Load(reader->more());
+ if (length > 0)
+ return Get(need);
+ }
+ return Token("", type, 0, line, 0);
+ }
+
+ while (isspace(*p) && p < eos) { // skip initial white space
+ if (*p == '\n') {
+ line++;
+ lineStart = p - str;
+ }
+ p++;
+ }
+
+ if (p >= eos) {
+ if (need == Demand && reader) {
+ Load(reader->more());
+ if (length > 0)
+ return Get(need);
+ }
+ return Token("", type, 0, line, 0);
+ }
+
+ Token result;
+ size_t start = p - str;
+
+ if (*p == '"' || *p == '\'') { // special case for quoted tokens
+
+ if (*p == '"') type = Token::StringLiteral;
+ else type = Token::CharLiteral;
+
+ char match = *p;
+ while (++p < eos) {
+ if (*p == match) { // find matching quote
+ if (*(p-1) != '\\') { // if not escaped
+ p++; // token includes matching quote
+ break;
+ }
+ }
+ }
+ }
+
+ // generic delimited comments
+ else if (*p == Token::comBeg(0) &&
+ (!Token::comBeg(1) || *(p+1) == Token::comBeg(1))) {
+ type = Token::Comment;
+ while (++p < eos) {
+ if (*p == Token::comEnd(0) &&
+ (!Token::comEnd(1) || *(p+1) == Token::comEnd(1))) {
+ p++; if (Token::comEnd(1)) p++;
+ break;
+ }
+ }
+ }
+
+ // alternate form delimited comments
+ else if (*p == Token::altBeg(0) &&
+ (!Token::altBeg(1) || *(p+1) == Token::altBeg(1))) {
+ type = Token::Comment;
+ while (++p < eos) {
+ if (*p == Token::altEnd(0) &&
+ (!Token::altEnd(1) || *(p+1) == Token::altEnd(1))) {
+ p++; if (Token::altEnd(1)) p++;
+ break;
+ }
+ }
+ }
+
+ else if (*p == '.') type = Token::Dot;
+ else if (*p == ',') type = Token::Comma;
+ else if (*p == ';') type = Token::Semicolon;
+ else if (*p == '(') type = Token::LParen;
+ else if (*p == ')') type = Token::RParen;
+ else if (*p == '[') type = Token::LBracket;
+ else if (*p == ']') type = Token::RBracket;
+ else if (*p == '{') type = Token::LBrace;
+ else if (*p == '}') type = Token::RBrace;
+
+ // use lexical sub-parser for ints and floats
+ else if (isdigit(*p))
+ type = GetNumeric();
+
+ else if (IsSymbolic(*p)) {
+ type = Token::SymbolicIdent;
+ while (IsSymbolic(*p)) p++;
+ }
+
+ else {
+ type = Token::AlphaIdent;
+ while (IsAlpha(*p)) p++;
+ }
+
+ size_t extent = (p - str) - start;
+
+ if (extent < 1) extent = 1; // always get at least one character
+
+ index = start + extent; // advance the cursor
+ int col = start - lineStart;
+ if (line == 0) col++;
+
+ char* buf = new char [extent + 1];
+ strncpy(buf, str + start, extent);
+ buf[extent] = '\0';
+
+ if (type == Token::Comment && Token::hidecom) {
+ delete [] buf;
+ if (Token::comEnd(0) == '\n') {
+ line++;
+ lineStart = p - str;
+ }
+ return Get(need);
+ }
+
+ if (type == Token::AlphaIdent || // check for keyword
+ type == Token::SymbolicIdent) {
+ int val;
+ if (Token::findKey(Text(buf), val))
+ result = Token(buf, Token::Keyword, val, line+1, col);
+ }
+
+ if (result.mType != Token::Keyword)
+ result = Token(buf, type, 0, line+1, col);
+
+ if (line+1 > (size_t) best.mLine ||
+ (line+1 == (size_t) best.mLine && col > best.mColumn))
+ best = result;
+
+ delete [] buf;
+ return result;
+}
+
+// +-------------------------------------------------------------------+
+
+int
+Scanner::GetNumeric()
+{
+ int type = Token::IntLiteral; // assume int
+
+ if (*p == '0' && *(p+1) == 'x') { // check for hex:
+ p += 2;
+ while (isxdigit(*p)) p++;
+ return type;
+ }
+
+ while (isdigit(*p) || *p == '_') p++; // whole number part
+
+ if (*p == '.') { p++; // optional fract part
+ type = Token::FloatLiteral; // implies float
+
+ while (isdigit(*p) || *p == '_') p++; // fractional part
+ }
+
+ if (*p == 'E' || *p == 'e') { p++; // optional exponent
+ if (*p == '+' || *p == '-') p++; // which may be signed
+ while (isdigit(*p)) p++;
+
+ type = Token::FloatLiteral; // implies float
+ }
+
+ return type;
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Scanner::IsAlpha(char c)
+{
+ return (isalpha(*p) || isdigit(*p) || (*p == '_'))?true:false;
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Scanner::IsSymbolic(char c)
+{
+ const char* s = "+-*/\\<=>~!@#$%^&|:";
+ return strchr(s, c)?true:false;
+}
diff --git a/DefinitionEx/Token.h b/DefinitionEx/Token.h
new file mode 100644
index 0000000..bd3723b
--- /dev/null
+++ b/DefinitionEx/Token.h
@@ -0,0 +1,145 @@
+/* Starshatter: The Open Source Project
+ Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+ Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+ Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+ AUTHOR: John DiCamillo
+
+
+ OVERVIEW
+ ========
+ Scanner class definition
+*/
+
+#ifndef Token_h
+#define Token_h
+
+#include "Text.h"
+#include "Dictionary.h"
+
+#pragma warning( disable : 4237)
+
+// +-------------------------------------------------------------------+
+
+class Reader;
+class Token;
+class Scanner;
+
+// +-------------------------------------------------------------------+
+
+class Token
+{
+ friend class Scanner;
+
+public:
+ // keywords must be alphanumeric identifiers or symbolic identifiers
+ enum Types { Undefined, Keyword, AlphaIdent, SymbolicIdent, Comment,
+ IntLiteral, FloatLiteral, StringLiteral, CharLiteral,
+ Dot, Comma, Colon, Semicolon,
+ LParen, RParen, LBracket, RBracket, LBrace, RBrace,
+ EOT, LastTokenType };
+
+ enum Alias { CompoundSeparator = Dot,
+ ItemSeparator = Comma,
+ StatementTerminator = Semicolon,
+ TypeIndicator = Colon,
+ Lambda = LastTokenType + 1 };
+
+ Token();
+ Token(const Token& rhs);
+ Token(int t);
+ Token(const char* s, int t, int k=0, int l=0, int c=0);
+ Token(const Text& s, int t, int k=0, int l=0, int c=0);
+ ~Token();
+
+ Token& operator = (const Token& rhs);
+
+ bool match(const Token& ref) const;
+
+ Text symbol() const;
+ int type() const { return mType; }
+ int key() const { return mKey; }
+ int line() const { return mLine; }
+ int column() const { return mColumn; }
+
+ Text typestr() const;
+
+ static Text describe(const Text& tok);
+ static void addKey(const Text& k, int v);
+ static void addKeys(Dictionary<int>& keys);
+ static bool findKey(const Text& k, int& v);
+ static void comments(const Text& begin, const Text& end);
+ static void altComments(const Text& begin, const Text& end);
+ static void hideComments(bool hide = true) { hidecom = hide; }
+
+ static char comBeg(unsigned int i) { return combeg[i]; }
+ static char comEnd(unsigned int i) { return comend[i]; }
+ static char altBeg(unsigned int i) { return altbeg[i]; }
+ static char altEnd(unsigned int i) { return altend[i]; }
+
+ static void close();
+
+protected:
+ int mLength;
+ union {
+ char mSymbol[8];
+ char* mFullSymbol;
+ };
+ int mType;
+ int mKey;
+ int mLine;
+ int mColumn;
+
+ static bool hidecom;
+ static char combeg[3];
+ static char comend[3];
+ static char altbeg[3];
+ static char altend[3];
+
+ static Dictionary<int> keymap;
+};
+
+// +-------------------------------------------------------------------+
+
+class Scanner
+{
+public:
+ Scanner(Reader* r = 0);
+ Scanner(const Text& s);
+ Scanner(const Scanner& rhs);
+ virtual ~Scanner();
+
+ Scanner& operator = (const Scanner& rhs);
+
+ void Load(const Text& s);
+
+ enum Need { Demand, Request };
+ virtual Token Get(Need n = Demand);
+
+ void PutBack() { index = old_index; line = old_line; }
+ int GetCursor() { return index; }
+ int GetLine() { return line; }
+ void Reset(int c, int l) { index = old_index = c; line = old_line = l; }
+ Token Best() const { return best; }
+
+protected:
+ virtual int GetNumeric();
+ virtual bool IsSymbolic(char c);
+ virtual bool IsAlpha(char c);
+
+ Reader* reader;
+ char* str;
+
+ const char* p;
+ const char* eos;
+
+ size_t index;
+ size_t old_index;
+ Token best;
+ size_t length;
+ size_t line;
+ size_t old_line;
+ size_t lineStart;
+};
+
+#endif // TOKEN_H