Moved def format implementation to own module

author: Aki <please@ignore.pl> 2022-04-02 19:19:08 +0200
committer: Aki <please@ignore.pl> 2022-04-02 20:23:41 +0200
commit: 94ef3b0248485714ca8e635af3811d788ee930e2 (patch)
tree: e9a9acea17e96a6ce4ce2a5dc790e9704a19dedf /DefinitionEx
parent: beb4c7aa02cfe80cdfc6793406823c5f32cb0b74 (diff)
download: starshatter-94ef3b0248485714ca8e635af3811d788ee930e2.zip
starshatter-94ef3b0248485714ca8e635af3811d788ee930e2.tar.gz
starshatter-94ef3b0248485714ca8e635af3811d788ee930e2.tar.bz2
7 files changed, 1346 insertions, 0 deletions
diff --git a/DefinitionEx/CMakeLists.txt b/DefinitionEx/CMakeLists.txt
new file mode 100644
index 0000000..9dd6620
--- /dev/null
+++ b/DefinitionEx/CMakeLists.txt
@@ -0,0 +1,15 @@
+project(DefinitionEx)
+add_library(
+	DefinitionEx STATIC
+	Parser.cpp
+	Term.cpp
+	Token.cpp
+	)
+target_include_directories(
+	DefinitionEx
+	PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+	)
+target_link_libraries(
+	DefinitionEx
+	PUBLIC FoundationEx
+	)
diff --git a/DefinitionEx/Parser.cpp b/DefinitionEx/Parser.cpp
new file mode 100644
index 0000000..09827cf
--- /dev/null
+++ b/DefinitionEx/Parser.cpp
@@ -0,0 +1,307 @@
+/*  Starshatter: The Open Source Project
+    Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+    Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+    Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+     AUTHOR:       John DiCamillo
+
+
+     OVERVIEW
+     ========
+     Implementation of the generic Parser class
+*/
+
+#include "Reader.h"
+#include "Token.h"
+#include "Parser.h"
+#include "Term.h"
+#include "Utils.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+enum KEYS { KEY_TRUE, KEY_FALSE, KEY_DEF, KEY_MINUS };
+
+static int dump_tokens = 0;
+
+// +-------------------------------------------------------------------+
+
+Term* error(char* msg, const Token& token)
+{
+    static char buf[1024];
+    sprintf_s(buf, " near '%s' in line %d.", (const char*) token.symbol(), token.line());
+
+    return error(msg, buf);
+}
+
+// +-------------------------------------------------------------------+
+
+Parser::Parser(Reader* r)
+{
+    reader = r ? r : new ConsoleReader;
+    lexer  = new Scanner(reader);
+
+    Token::addKey("true",   KEY_TRUE);
+    Token::addKey("false",  KEY_FALSE);
+    Token::addKey(":",      KEY_DEF);
+    Token::addKey("-",      KEY_MINUS);
+}
+
+Parser::~Parser()
+{
+    delete lexer;
+    delete reader;
+    //Token::close();
+}
+
+Term*
+Parser::ParseTerm()
+{
+    Term* t = ParseTermBase();
+    if (t == 0) return t;
+
+    Term* t2 = ParseTermRest(t);
+
+    return t2;
+}
+
+Term*
+Parser::ParseTermRest(Term* base)
+{
+    Token    t = lexer->Get();
+
+    switch (t.type()) {
+    default:
+        lexer->PutBack();
+        return base;
+
+    case Token::StringLiteral: {
+            // concatenate adjacent string literal tokens:
+            TermText* text  = base->isText();
+            if (text) {
+                TermText* base2 = new TermText(text->value() + t.symbol()(1, t.symbol().length()-2));
+                delete base;
+                return ParseTermRest(base2);
+            }
+            else {
+                lexer->PutBack();
+            }
+        }
+        break;
+
+    case Token::Keyword:
+        switch (t.key()) {
+        case KEY_DEF:
+            if (base->isText())
+                return new TermDef(base->isText(), ParseTerm());
+            else
+                return error("(Parse) illegal lhs in def", t);
+
+        default:
+            lexer->PutBack();
+            return base;
+        }
+        break;
+    }
+
+    return base;
+}
+
+static int xtol(const char* p)
+{
+    int n = 0;
+
+    while (*p) {
+        char digit = *p++;
+        n *= 16;
+
+        if (digit >= '0' && digit <= '9')
+            n += digit - '0';
+
+        else if (digit >= 'a' && digit <= 'f')
+            n += digit - 'a' + 10;
+
+        else if (digit >= 'A' && digit <= 'F')
+            n += digit - 'A' + 10;
+    }
+
+    return n;
+}
+
+Term*
+Parser::ParseTermBase()
+{
+    Token    t = lexer->Get();
+    int      n = 0;
+    double   d = 0.0;
+
+    switch (t.type()) {
+    case Token::IntLiteral: {
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        char nstr[256], *p = nstr;
+        for (int i = 0; i < (int) t.symbol().length(); i++)
+            if (t.symbol()[i] != '_')
+                *p++ = t.symbol()[i];
+        *p++ = '\0';
+
+        // handle hex notation:
+        if (nstr[1] == 'x')
+            n = xtol(nstr+2);
+
+        else
+            n = atol(nstr);
+
+        return new TermNumber(n);
+        }
+
+    case Token::FloatLiteral: {
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        char nstr[256], *p = nstr;
+        for (int i = 0; i < (int) t.symbol().length(); i++)
+            if (t.symbol()[i] != '_')
+                *p++ = t.symbol()[i];
+        *p++ = '\0';
+
+        d = atof(nstr);
+        return new TermNumber(d);
+        }
+
+    case Token::StringLiteral:
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        return new TermText(t.symbol()(1, t.symbol().length()-2));
+
+    case Token::AlphaIdent:
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        return new TermText(t.symbol());
+
+    case Token::Keyword:
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        switch (t.key()) {
+        case KEY_FALSE:   return new TermBool(0);
+        case KEY_TRUE:    return new TermBool(1);
+
+        case KEY_MINUS: {
+                Token next = lexer->Get();
+                if (next.type() == Token::IntLiteral) {
+                    if (dump_tokens)
+                        Print("%s", next.symbol().data());
+
+                    char nstr[256], *p = nstr;
+                    for (int i = 0; i < (int) next.symbol().length(); i++)
+                        if (next.symbol()[i] != '_')
+                            *p++ = next.symbol()[i];
+                    *p++ = '\0';
+
+                    n = -1 * atol(nstr);
+                    return new TermNumber(n);
+                }
+                else if (next.type() == Token::FloatLiteral) {
+                    if (dump_tokens)
+                        Print("%s", next.symbol().data());
+
+                    char nstr[256], *p = nstr;
+                    for (int i = 0; i < (int) next.symbol().length(); i++)
+                        if (next.symbol()[i] != '_')
+                            *p++ = next.symbol()[i];
+                    *p++ = '\0';
+
+                    d = -1.0 * atof(nstr);
+                    return new TermNumber(d);
+                }
+                else {
+                    lexer->PutBack();
+                    return error("(Parse) illegal token '-': number expected", next);
+                }
+            }
+            break;
+
+        default:
+            lexer->PutBack();
+            return 0;
+        }
+
+    case Token::LParen:  return ParseArray();
+
+    case Token::LBrace:  return ParseStruct();
+
+    case Token::CharLiteral:
+        return error("(Parse) illegal token ", t);
+
+    default:
+        lexer->PutBack();
+        return 0;
+    }
+}
+
+TermArray*
+Parser::ParseArray()
+{
+    TermList*   elems = ParseTermList(0);
+    Token       end = lexer->Get();
+
+    if (end.type() != Token::RParen)
+        return (TermArray*) error("(Parse) ')' missing in array-decl", end);
+
+    return new TermArray(elems);
+}
+
+TermStruct*
+Parser::ParseStruct()
+{
+    TermList*   elems = ParseTermList(1);
+    Token       end = lexer->Get();
+
+    if (end.type() != Token::RBrace)
+        return (TermStruct*) error("(Parse) '}' missing in struct", end);
+
+    return new TermStruct(elems);
+}
+
+TermList*
+Parser::ParseTermList(int for_struct)
+{
+    TermList*   tlist = new TermList;
+
+    Term* term = ParseTerm();
+    while (term) {
+        if (for_struct && !term->isDef()) {
+            return (TermList*) error("(Parse) non-definition term in struct");
+        }
+        else if (!for_struct && term->isDef()) {
+            return (TermList*) error("(Parse) illegal definition in array");
+        }
+
+        tlist->append(term);
+        Token t = lexer->Get();
+
+        /*** OLD WAY: COMMA SEPARATORS REQUIRED ***
+        if (t.type() != Token::Comma) {
+            lexer->PutBack();
+            term = 0;
+        }
+        else
+            term = ParseTerm();
+        /*******************************************/
+
+        // NEW WAY: COMMA SEPARATORS OPTIONAL:
+        if (t.type() != Token::Comma) {
+            lexer->PutBack();
+        }
+
+        term = ParseTerm();
+    }
+
+    return tlist;
+}
+
+
+
diff --git a/DefinitionEx/Parser.h b/DefinitionEx/Parser.h
new file mode 100644
index 0000000..84fe268
--- /dev/null
+++ b/DefinitionEx/Parser.h
@@ -0,0 +1,45 @@
+/*  Starshatter: The Open Source Project
+    Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+    Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+    Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+     AUTHOR:       John DiCamillo
+
+
+     OVERVIEW
+     ========
+     Declaration of the generic Parser class
+*/
+
+#ifndef PARSER_H
+#define PARSER_H
+
+#include "Text.h"
+#include "Term.h"
+
+// +-------------------------------------------------------------------+
+
+class Reader;
+class Scanner;
+
+// +-------------------------------------------------------------------+
+
+class Parser
+{
+public:
+    Parser(Reader* r = 0);
+    ~Parser();
+
+    Term*          ParseTerm();
+    Term*          ParseTermBase();
+    Term*          ParseTermRest(Term* base);
+    TermList*      ParseTermList(int for_struct);
+    TermArray*     ParseArray();
+    TermStruct*    ParseStruct();
+
+private:
+    Reader*        reader;
+    Scanner*       lexer;
+};
+
+#endif
diff --git a/DefinitionEx/Term.cpp b/DefinitionEx/Term.cpp
new file mode 100644
index 0000000..acd2c74
--- /dev/null
+++ b/DefinitionEx/Term.cpp
@@ -0,0 +1,119 @@
+/*  Starshatter: The Open Source Project
+    Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+    Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+    Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+     AUTHOR:       John DiCamillo
+
+
+     OVERVIEW
+     ========
+     Implementation of the Term class
+*/
+
+
+#include "Term.h"
+#include "Utils.h"
+
+// +-------------------------------------------------------------------+
+
+Term*
+error(char* s1, char* s2)
+{
+    Print("ERROR: ");
+    if (s1) Print(s1);
+    if (s2) Print(s2);
+    Print("\n\n");
+    return 0;
+}
+
+// +-------------------------------------------------------------------+
+
+void TermBool::print(int level)  { if (level > 0) Print(val? "true" : "false"); else Print("..."); }
+void TermNumber::print(int level){ if (level > 0) Print("%g", val);     else Print("..."); }
+void TermText::print(int level)  { if (level > 0) Print("\"%s\"", val.data()); else Print("..."); }
+
+// +-------------------------------------------------------------------+
+
+TermArray::TermArray(TermList* elist)
+{
+    elems = elist;
+}
+
+TermArray::~TermArray()
+{
+    if (elems) elems->destroy();
+    delete elems;
+}
+
+void
+TermArray::print(int level)
+{
+    if (level > 1) {
+        Print("(");
+
+        if (elems) {
+            for (int i = 0; i < elems->size(); i++) {
+                elems->at(i)->print(level-1);
+                if (i < elems->size() -1)
+                    Print(", ");
+            }
+        }
+
+        Print(") ");
+    }
+    else Print("(...) ");
+}
+
+// +-------------------------------------------------------------------+
+
+TermStruct::TermStruct(TermList* elist)
+{
+    elems = elist;
+}
+
+TermStruct::~TermStruct()
+{
+    if (elems) elems->destroy();
+    delete elems;
+}
+
+void
+TermStruct::print(int level)
+{
+    if (level > 1) {
+        Print("{");
+
+        if (elems) {
+            for (int i = 0; i < elems->size(); i++) {
+                elems->at(i)->print(level-1);
+                if (i < elems->size() -1)
+                    Print(", ");
+            }
+        }
+
+        Print("} ");
+    }
+    else Print("{...} ");
+}
+
+// +-------------------------------------------------------------------+
+
+TermDef::~TermDef()
+{
+    delete mname;
+    delete mval;
+}
+
+void
+TermDef::print(int level)
+{
+    if (level >= 0) {
+        mname->print(level);
+        Print(": ");
+        mval->print(level-1);
+    }
+    else Print("...");
+}
+
+// +-------------------------------------------------------------------+
diff --git a/DefinitionEx/Term.h b/DefinitionEx/Term.h
new file mode 100644
index 0000000..79e2fc3
--- /dev/null
+++ b/DefinitionEx/Term.h
@@ -0,0 +1,171 @@
+/*  Starshatter: The Open Source Project
+    Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+    Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+    Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+     AUTHOR:       John DiCamillo
+
+
+     OVERVIEW
+     ========
+     Declaration of the Abstract Syntax Tree classes
+*/
+
+
+#ifndef TERM_H
+#define TERM_H
+
+#include "Text.h"
+#include "List.h"
+
+// +-------------------------------------------------------------------+
+
+class Term;
+class TermBool;
+class TermNumber;
+class TermText;
+class TermArray;
+class TermDef;
+class TermStruct;
+
+// +-------------------------------------------------------------------+
+
+class Term
+{
+public:
+    static const char* TYPENAME() { return "Term"; }
+
+     Term()            { }
+    virtual ~Term()   { }
+
+    virtual int operator==(const Term& rhs) const { return 0; }
+
+     virtual void print(int level=10) { }
+
+     // conversion tests
+     virtual Term*              touch()     { return this; }
+     virtual TermBool*          isBool()    { return 0; }
+     virtual TermNumber*        isNumber()  { return 0; }
+     virtual TermText*          isText()    { return 0; }
+     virtual TermArray*         isArray()   { return 0; }
+     virtual TermDef*           isDef()     { return 0; }
+     virtual TermStruct*        isStruct()  { return 0; }
+};
+
+Term* error(char*, char* = 0);
+
+// +-------------------------------------------------------------------+
+
+typedef List<Term>      TermList;
+typedef ListIter<Term>  TermListIter;
+
+// +-------------------------------------------------------------------+
+
+class TermBool : public Term
+{
+public:
+    static const char* TYPENAME() { return "TermBool"; }
+
+    TermBool(bool v) : val(v) { }
+
+    virtual void      print(int level=10);
+     virtual TermBool* isBool()      { return this; }
+              bool      value() const { return val;  }
+
+private:
+    bool val;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermNumber : public Term
+{
+public:
+    static const char* TYPENAME() { return "TermNumber"; }
+
+    TermNumber(double v) : val(v) { }
+
+    virtual void         print(int level=10);
+     virtual TermNumber*  isNumber()     { return this; }
+              double       value() const  { return val;  }
+
+private:
+    double val;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermText : public Term
+{
+public:
+    static const char* TYPENAME() { return "TermText"; }
+
+    TermText(const Text& v) : val(v)   { }
+
+    virtual void      print(int level=10);
+     virtual TermText* isText()       { return this; }
+              Text      value() const  { return val;  }
+
+private:
+    Text val;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermArray : public Term
+{
+public:
+    static const char* TYPENAME() { return "TermArray"; }
+
+    TermArray(TermList* elist);
+    virtual ~TermArray();
+
+    virtual void         print(int level=10);
+    virtual TermArray*   isArray()   { return this;  }
+              TermList*    elements()  { return elems; }
+
+private:
+    TermList*   elems;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermStruct : public Term
+{
+public:
+    static const char* TYPENAME() { return "TermStruct"; }
+
+    TermStruct(TermList* elist);
+    virtual ~TermStruct();
+
+    virtual void         print(int level=10);
+
+    virtual TermStruct*  isStruct()  { return this; }
+              TermList*    elements()  { return elems; }
+
+private:
+    TermList*   elems;
+};
+
+// +-------------------------------------------------------------------+
+
+class TermDef : public Term
+{
+public:
+    static const char* TYPENAME() { return "TermDef"; }
+
+    TermDef(TermText* n, Term* v) : mname(n), mval(v) { }
+    virtual ~TermDef();
+
+    virtual void         print(int level=10);
+    virtual TermDef*     isDef()     { return this; }
+
+    virtual TermText*    name()      { return mname; }
+    virtual Term*        term()      { return mval;  }
+
+private:
+    TermText*   mname;
+    Term*       mval;
+};
+
+#endif
diff --git a/DefinitionEx/Token.cpp b/DefinitionEx/Token.cpp
new file mode 100644
index 0000000..2cc97b5
--- /dev/null
+++ b/DefinitionEx/Token.cpp
@@ -0,0 +1,544 @@
+/*  Starshatter: The Open Source Project
+    Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+    Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+    Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+     AUTHOR:       John DiCamillo
+
+
+     OVERVIEW
+     ========
+     Scanner class implementation
+*/
+
+#include "Token.h"
+#include "Reader.h"
+#include "Text.h"
+
+#include <ctype.h>
+
+// +-------------------------------------------------------------------+
+
+bool        Token::hidecom   = true;
+char        Token::combeg[3] = "//";
+char        Token::comend[3] = "\n";
+char        Token::altbeg[3] = "/*";
+char        Token::altend[3] = "*/";
+Dictionary<int>   Token::keymap;
+
+// +-------------------------------------------------------------------+
+
+Token::Token()
+    : mType(Undefined), mKey(0), mLine(0), mColumn(0)
+{
+    mLength    = 0;
+    mSymbol[0] = '\0';
+}
+
+Token::Token(const Token& rhs)
+    : mType(rhs.mType), mKey(rhs.mKey), mLine(rhs.mLine), mColumn(rhs.mColumn)
+{
+    mLength = rhs.mLength;
+    if (mLength < 8) {
+        strcpy_s(mSymbol, rhs.mSymbol);
+    }
+    else {
+        mFullSymbol = new char[mLength + 1];
+        strcpy(mFullSymbol, rhs.mFullSymbol);
+    }
+}
+
+Token::Token(int t)
+    : mType(t), mKey(0), mLine(0), mColumn(0)
+{
+    mLength    = 0;
+    mSymbol[0] = '\0';
+}
+
+Token::Token(const char* s, int t, int k, int l, int c)
+    : mType(t), mKey(k), mLine(l), mColumn(c)
+{
+    mLength = strlen(s);
+    if (mLength < 8) {
+        strcpy_s(mSymbol, s);
+    }
+    else {
+        mFullSymbol = new char[mLength + 1];
+        strcpy(mFullSymbol, s);
+    }
+}
+
+Token::Token(const Text& s, int t, int k, int l, int c)
+    : mType(t), mKey(k), mLine(l), mColumn(c)
+{
+    mLength = s.length();
+    if (mLength < 8) {
+        strcpy_s(mSymbol, s.data());
+    }
+    else {
+        mFullSymbol = new char[mLength + 1];
+        strcpy(mFullSymbol, s.data());
+    }
+}
+
+Token::~Token()
+{
+    if (mLength >= 8)
+        delete [] mFullSymbol;
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::close()
+{
+    keymap.clear();
+}
+
+// +-------------------------------------------------------------------+
+
+Token&
+Token::operator = (const Token& rhs)
+{
+    if (mLength >= 8)
+        delete [] mFullSymbol;
+
+    mLength = rhs.mLength;
+    if (mLength < 8) {
+        strcpy_s(mSymbol, rhs.mSymbol);
+    }
+    else {
+        mFullSymbol = new char[mLength + 1];
+        strcpy(mFullSymbol, rhs.mFullSymbol);
+    }
+
+    mType   = rhs.mType;
+    mKey    = rhs.mKey;
+    mLine   = rhs.mLine;
+    mColumn = rhs.mColumn;
+
+    return *this;
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Token::match(const Token& ref) const
+{
+    if (mType == ref.mType) {                    // if types match
+        if (ref.mLength == 0)                     // if no symbol to match
+            return true;                           // match!
+
+        else if (mLength == ref.mLength) {        // else if symbols match
+            if (mLength < 8) {
+                if (!strcmp(mSymbol, ref.mSymbol))
+                    return true;                     // match!
+            }
+            else {
+                if (!strcmp(mFullSymbol, ref.mFullSymbol))
+                    return true;                     // match!
+            }
+        }
+    }
+
+    return false;
+}
+
+// +-------------------------------------------------------------------+
+
+Text
+Token::symbol() const
+{
+    if (mLength < 8)
+        return Text(mSymbol);
+    else
+        return Text(mFullSymbol);
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::addKey(const Text& k, int v)
+{
+    keymap.insert(k, v);
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::addKeys(Dictionary<int>& keys)
+{
+    DictionaryIter<int> iter = keys;
+    while (++iter)
+        keymap.insert(iter.key(), iter.value());
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Token::findKey(const Text& k, int& v)
+{
+    if (keymap.contains(k)) {
+        v = keymap.find(k, 0);
+        return true;
+    }
+    else
+        return false;
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::comments(const Text& begin, const Text& end)
+{
+    combeg[0] = begin(0);
+    if (begin.length() > 1) combeg[1] = begin(1);
+    else                    combeg[1] = '\0';
+
+    comend[0] = end(0);
+    if (end.length() > 1)   comend[1] = end(1);
+    else                    comend[1] = '\0';
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::altComments(const Text& begin, const Text& end)
+{
+    altbeg[0] = begin(0);
+    if (begin.length() > 1) altbeg[1] = begin(1);
+    else                    altbeg[1] = '\0';
+
+    altend[0] = end(0);
+    if (end.length() > 1)   altend[1] = end(1);
+    else                    altend[1] = '\0';
+}
+
+// +-------------------------------------------------------------------+
+
+Text
+Token::typestr() const
+{
+    Text t = "Unknown";
+    switch (type()) {
+    case Undefined:      t = "Undefined"; break;
+    case Keyword:        t = "Keyword"; break;
+    case AlphaIdent:     t = "AlphaIdent"; break;
+    case SymbolicIdent:  t = "SymbolicIdent"; break;
+    case Comment:        t = "Comment"; break;
+    case IntLiteral:     t = "IntLiteral"; break;
+    case FloatLiteral:   t = "FloatLiteral"; break;
+    case StringLiteral:  t = "StringLiteral"; break;
+    case CharLiteral:    t = "CharLiteral"; break;
+    case Dot:            t = "Dot"; break;
+    case Comma:          t = "Comma"; break;
+    case Colon:          t = "Colon"; break;
+    case Semicolon:      t = "Semicolon"; break;
+    case LParen:         t = "LParen"; break;
+    case RParen:         t = "RParen"; break;
+    case LBracket:       t = "LBracket"; break;
+    case RBracket:       t = "RBracket"; break;
+    case LBrace:         t = "LBrace"; break;
+    case RBrace:         t = "RBrace"; break;
+    case EOT:            t = "EOT"; break;
+    case LastTokenType:  t = "LastTokenType"; break;
+    }
+
+    return t;
+}
+
+// +-------------------------------------------------------------------+
+
+Text
+Token::describe(const Text& tok)
+{
+    Text d;
+
+    switch (tok(0)) {
+    case '.' : d = "Token::Dot"; break;
+    case ',' : d = "Token::Comma"; break;
+    case ';' : d = "Token::Semicolon"; break;
+    case '(' : d = "Token::LParen"; break;
+    case ')' : d = "Token::RParen"; break;
+    case '[' : d = "Token::LBracket"; break;
+    case ']' : d = "Token::RBracket"; break;
+    case '{' : d = "Token::LBrace"; break;
+    case '}' : d = "Token::RBrace"; break;
+    default  :               break;
+    }
+
+    if (d.length() == 0) {
+        if (isalpha(tok(0)))
+            d = "\"" + tok + "\", Token::AlphaIdent";
+        else if (isdigit(tok(0))) {
+            if (tok.contains("."))
+                d = "\"" + tok + "\", Token::FloatLiteral";
+            else
+                d = "\"" + tok + "\", Token::IntLiteral";
+        }
+        else
+            d = "\"" + tok + "\", Token::SymbolicIdent";
+    }
+
+    return d;
+}
+
+// +-------------------------------------------------------------------+
+
+Scanner::Scanner(Reader* r)
+    : reader(r), str(0), index(0), old_index(0),
+      length(0), line(0), old_line(0), lineStart(0)
+{ }
+
+Scanner::Scanner(const Scanner& rhs)
+    : index(rhs.index), old_index(rhs.old_index), length(rhs.length),
+      reader(rhs.reader),
+      line(rhs.line), old_line(0), lineStart(rhs.lineStart)
+{
+    str = new char [strlen(rhs.str) + 1];
+    strcpy(str, rhs.str);
+}
+
+Scanner::Scanner(const Text& s)
+    : reader(0), index(0), old_index(0), length(s.length()), line(0),
+      old_line(0), lineStart(0)
+{
+    str = new char [s.length() + 1];
+    strcpy(str, s.data());
+}
+
+Scanner::~Scanner()
+{
+    delete [] str;
+}
+
+// +-------------------------------------------------------------------+
+
+Scanner&
+Scanner::operator = (const Scanner& rhs)
+{
+    delete [] str;
+    str = new char [strlen(rhs.str) + 1];
+    strcpy(str, rhs.str);
+
+    index     = rhs.index;
+    old_index = rhs.old_index;
+    length    = rhs.length;
+    line      = rhs.line;
+    old_line  = rhs.old_line;
+    lineStart = rhs.lineStart;
+
+    return *this;
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Scanner::Load(const Text& s)
+{
+    delete [] str;
+    str = new char [s.length() + 1];
+    strcpy(str, s.data());
+
+    index       = 0;
+    old_index   = 0;
+    best        = Token();
+    length      = s.length();
+    line        = 0;
+    old_line    = 0;
+    lineStart   = 0;
+}
+
+// +-------------------------------------------------------------------+
+
+Token
+Scanner::Get(Need need)
+{
+    int   type = Token::EOT;
+    old_index  = index;
+    old_line   = line;
+
+    eos = str + length;
+    p   = str + index;
+
+    if (p >= eos) {
+        if (need == Demand && reader) {
+            Load(reader->more());
+            if (length > 0)
+                return Get(need);
+        }
+        return Token("", type, 0, line, 0);
+    }
+
+    while (isspace(*p) && p < eos) { // skip initial white space
+        if (*p == '\n') {
+            line++;
+            lineStart = p - str;
+        }
+        p++;
+    }
+
+    if (p >= eos) {
+        if (need == Demand && reader) {
+            Load(reader->more());
+            if (length > 0)
+                return Get(need);
+        }
+        return Token("", type, 0, line, 0);
+    }
+
+    Token  result;
+    size_t start = p - str;
+
+    if (*p == '"' || *p == '\'') {   // special case for quoted tokens
+
+        if (*p == '"') type = Token::StringLiteral;
+        else           type = Token::CharLiteral;
+
+        char match = *p;
+        while (++p < eos) {
+            if (*p == match) {         // find matching quote
+                if (*(p-1) != '\\') {   // if not escaped
+                    p++;                 // token includes matching quote
+                    break;
+                }
+            }
+        }
+    }
+
+    // generic delimited comments
+    else if (*p == Token::comBeg(0) &&
+                     (!Token::comBeg(1) || *(p+1) == Token::comBeg(1))) {
+        type = Token::Comment;
+        while (++p < eos) {
+            if (*p == Token::comEnd(0) &&
+                      (!Token::comEnd(1) || *(p+1) == Token::comEnd(1))) {
+                p++; if (Token::comEnd(1)) p++;
+                break;
+            }
+        }
+    }
+
+    // alternate form delimited comments
+    else if (*p == Token::altBeg(0) &&
+                     (!Token::altBeg(1) || *(p+1) == Token::altBeg(1))) {
+        type = Token::Comment;
+        while (++p < eos) {
+            if (*p == Token::altEnd(0) &&
+                      (!Token::altEnd(1) || *(p+1) == Token::altEnd(1))) {
+                p++; if (Token::altEnd(1)) p++;
+                break;
+            }
+        }
+    }
+
+    else if (*p == '.')  type = Token::Dot;
+    else if (*p == ',')  type = Token::Comma;
+    else if (*p == ';')  type = Token::Semicolon;
+    else if (*p == '(')  type = Token::LParen;
+    else if (*p == ')')  type = Token::RParen;
+    else if (*p == '[')  type = Token::LBracket;
+    else if (*p == ']')  type = Token::RBracket;
+    else if (*p == '{')  type = Token::LBrace;
+    else if (*p == '}')  type = Token::RBrace;
+
+    // use lexical sub-parser for ints and floats
+    else if (isdigit(*p))
+        type = GetNumeric();
+
+    else if (IsSymbolic(*p)) {
+        type = Token::SymbolicIdent;
+        while (IsSymbolic(*p)) p++;
+    }
+
+    else {
+        type = Token::AlphaIdent;
+        while (IsAlpha(*p)) p++;
+    }
+
+    size_t extent = (p - str) - start;
+
+    if (extent < 1) extent = 1;      // always get at least one character
+
+    index  = start + extent;         // advance the cursor
+    int col = start - lineStart;
+    if (line == 0) col++;
+
+    char* buf = new char [extent + 1];
+    strncpy(buf, str + start, extent);
+    buf[extent] = '\0';
+
+    if (type == Token::Comment && Token::hidecom) {
+        delete [] buf;
+        if (Token::comEnd(0) == '\n') {
+            line++;
+            lineStart = p - str;
+        }
+        return Get(need);
+    }
+
+    if (type == Token::AlphaIdent || // check for keyword
+         type == Token::SymbolicIdent) {
+         int val;
+         if (Token::findKey(Text(buf), val))
+            result = Token(buf, Token::Keyword, val, line+1, col);
+    }
+
+    if (result.mType != Token::Keyword)
+        result = Token(buf, type, 0, line+1, col);
+
+    if (line+1 >  (size_t) best.mLine ||
+        (line+1 == (size_t) best.mLine && col > best.mColumn))
+        best = result;
+
+    delete [] buf;
+    return result;
+}
+
+// +-------------------------------------------------------------------+
+
+int
+Scanner::GetNumeric()
+{
+    int type = Token::IntLiteral;             // assume int
+
+    if (*p == '0' && *(p+1) == 'x') {         // check for hex:
+        p += 2;
+        while (isxdigit(*p)) p++;
+        return type;
+    }
+
+    while (isdigit(*p) || *p == '_') p++;     // whole number part
+
+    if (*p == '.') { p++;                     // optional fract part
+        type = Token::FloatLiteral;            // implies float
+
+        while (isdigit(*p) || *p == '_') p++;  // fractional part
+    }
+
+    if (*p == 'E' || *p == 'e') {  p++;       // optional exponent
+        if (*p == '+' || *p == '-') p++;       // which may be signed
+        while (isdigit(*p)) p++;
+
+        type = Token::FloatLiteral;            // implies float
+    }
+
+    return type;
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Scanner::IsAlpha(char c)
+{
+    return (isalpha(*p) || isdigit(*p) || (*p == '_'))?true:false;
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Scanner::IsSymbolic(char c)
+{
+    const char* s = "+-*/\\<=>~!@#$%^&|:";
+    return strchr(s, c)?true:false;
+}
diff --git a/DefinitionEx/Token.h b/DefinitionEx/Token.h
new file mode 100644
index 0000000..bd3723b
--- /dev/null
+++ b/DefinitionEx/Token.h
@@ -0,0 +1,145 @@
+/*  Starshatter: The Open Source Project
+    Copyright (c) 2021-2022, Starshatter: The Open Source Project Contributors
+    Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+    Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+     AUTHOR:       John DiCamillo
+
+
+     OVERVIEW
+     ========
+     Scanner class definition
+*/
+
+#ifndef Token_h
+#define Token_h
+
+#include "Text.h"
+#include "Dictionary.h"
+
+#pragma warning( disable : 4237)
+
+// +-------------------------------------------------------------------+
+
+class Reader;
+class Token;
+class Scanner;
+
+// +-------------------------------------------------------------------+
+
+class Token
+{
+    friend class Scanner;
+
+public:
+    // keywords must be alphanumeric identifiers or symbolic identifiers
+    enum Types { Undefined, Keyword, AlphaIdent, SymbolicIdent, Comment,
+                     IntLiteral, FloatLiteral, StringLiteral, CharLiteral,
+                     Dot, Comma, Colon, Semicolon,
+                     LParen, RParen, LBracket, RBracket, LBrace, RBrace,
+                     EOT, LastTokenType };
+
+    enum Alias { CompoundSeparator   = Dot,
+                     ItemSeparator       = Comma,
+                     StatementTerminator = Semicolon,
+                     TypeIndicator       = Colon,
+                     Lambda              = LastTokenType + 1 };
+
+    Token();
+    Token(const Token& rhs);
+    Token(int t);
+    Token(const char* s, int t, int k=0, int l=0, int c=0);
+    Token(const Text& s, int t, int k=0, int l=0, int c=0);
+    ~Token();
+
+    Token& operator = (const Token& rhs);
+
+    bool     match(const Token& ref) const;
+
+    Text     symbol()   const;
+    int      type()     const { return mType;   }
+    int      key()      const { return mKey;    }
+    int      line()     const { return mLine;   }
+    int      column()   const { return mColumn; }
+
+    Text     typestr()  const;
+
+    static Text   describe(const Text& tok);
+    static void   addKey(const Text& k, int v);
+    static void   addKeys(Dictionary<int>& keys);
+    static bool   findKey(const Text& k, int& v);
+    static void   comments(const Text& begin, const Text& end);
+    static void   altComments(const Text& begin, const Text& end);
+    static void   hideComments(bool hide = true) { hidecom = hide; }
+
+    static char   comBeg(unsigned int i) { return combeg[i]; }
+    static char   comEnd(unsigned int i) { return comend[i]; }
+    static char   altBeg(unsigned int i) { return altbeg[i]; }
+    static char   altEnd(unsigned int i) { return altend[i]; }
+
+    static void   close();
+
+protected:
+    int      mLength;
+    union {
+        char  mSymbol[8];
+        char* mFullSymbol;
+    };
+    int      mType;
+    int      mKey;
+    int      mLine;
+    int      mColumn;
+
+    static bool             hidecom;
+    static char             combeg[3];
+    static char             comend[3];
+    static char             altbeg[3];
+    static char             altend[3];
+
+    static Dictionary<int> keymap;
+};
+
+// +-------------------------------------------------------------------+
+
+class Scanner
+{
+public:
+    Scanner(Reader* r = 0);
+    Scanner(const Text&  s);
+    Scanner(const Scanner& rhs);
+    virtual ~Scanner();
+
+    Scanner& operator = (const Scanner& rhs);
+
+    void           Load(const Text& s);
+
+    enum Need { Demand, Request };
+    virtual Token  Get(Need n = Demand);
+
+    void           PutBack()     { index = old_index; line = old_line; }
+    int            GetCursor()   { return index;     }
+    int            GetLine()     { return line;      }
+    void           Reset(int c, int l) { index = old_index = c; line = old_line = l; }
+    Token          Best() const  { return best;      }
+
+protected:
+    virtual int    GetNumeric();
+    virtual bool   IsSymbolic(char c);
+    virtual bool   IsAlpha(char c);
+
+    Reader*     reader;
+    char*       str;
+
+    const char* p;
+    const char* eos;
+
+    size_t      index;
+    size_t      old_index;
+    Token       best;
+    size_t      length;
+    size_t      line;
+    size_t      old_line;
+    size_t      lineStart;
+};
+
+#endif // TOKEN_H
author	Aki <please@ignore.pl>	2022-04-02 19:19:08 +0200
committer	Aki <please@ignore.pl>	2022-04-02 20:23:41 +0200
commit	94ef3b0248485714ca8e635af3811d788ee930e2 (patch)
tree	e9a9acea17e96a6ce4ce2a5dc790e9704a19dedf /DefinitionEx
parent	beb4c7aa02cfe80cdfc6793406823c5f32cb0b74 (diff)
download	starshatter-94ef3b0248485714ca8e635af3811d788ee930e2.zip starshatter-94ef3b0248485714ca8e635af3811d788ee930e2.tar.gz starshatter-94ef3b0248485714ca8e635af3811d788ee930e2.tar.bz2