Definition Parser now consumes from the new Reader

Next steps are to move ParseUtils functionality to DefinitinoEx, sort things into the namespaces in this library, and then prepare it to handle overloading like nlohmann_json does. On the other side, it's time for starshatter::data.
author: Aki <please@ignore.pl> 2024-03-19 22:56:52 +0100
committer: Aki <please@ignore.pl> 2024-03-19 22:56:52 +0100
commit: f5b8091ee91b8323b8e2b1044ba8be676f2bfaf4 (patch)
tree: bbcfc73dd8db75579127686f9cd07e9c45d374d0 /DefinitionEx/src
parent: 2066e4911948d11cac5a234d2f7773dc5f06ba96 (diff)
download: starshatter-f5b8091ee91b8323b8e2b1044ba8be676f2bfaf4.zip
starshatter-f5b8091ee91b8323b8e2b1044ba8be676f2bfaf4.tar.gz
starshatter-f5b8091ee91b8323b8e2b1044ba8be676f2bfaf4.tar.bz2
3 files changed, 984 insertions, 0 deletions
diff --git a/DefinitionEx/src/Parser_ss.cpp b/DefinitionEx/src/Parser_ss.cpp
new file mode 100644
index 0000000..1b80d48
--- /dev/null
+++ b/DefinitionEx/src/Parser_ss.cpp
@@ -0,0 +1,316 @@
+/*  Starshatter: The Open Source Project
+    Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors
+    Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+    Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+    AUTHOR:       John DiCamillo
+
+
+    OVERVIEW
+    ========
+    Implementation of the generic Parser class
+*/
+
+#include <Parser_ss.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <utility>
+
+#include <starshatter/foundation/reader.h>
+#include <Utils.h>
+
+#include <Term.h>
+#include <Token.h>
+
+enum KEYS { KEY_TRUE, KEY_FALSE, KEY_DEF, KEY_MINUS };
+
+static int dump_tokens = 0;
+
+// +-------------------------------------------------------------------+
+
+Term* error(const char* msg, const Token& token)
+{
+    static char buf[1024];
+    snprintf(buf, 1024, " near '%s' in line %d", (const char*) token.symbol(), token.line());
+    return error(msg, buf);
+}
+
+// +-------------------------------------------------------------------+
+
+Parser::Parser() :
+	lexer {nullptr}
+{
+    Token::addKey("true",   KEY_TRUE);
+    Token::addKey("false",  KEY_FALSE);
+    Token::addKey(":",      KEY_DEF);
+    Token::addKey("-",      KEY_MINUS);
+}
+
+
+Parser::Parser(starshatter::foundation::Reader r) :
+	lexer {new Scanner(std::move(r))}
+{
+    Token::addKey("true",   KEY_TRUE);
+    Token::addKey("false",  KEY_FALSE);
+    Token::addKey(":",      KEY_DEF);
+    Token::addKey("-",      KEY_MINUS);
+}
+
+Parser::~Parser()
+{
+    if (lexer)
+        delete lexer;
+    //Token::close();
+}
+
+Term*
+Parser::ParseTerm()
+{
+    Term* t = ParseTermBase();
+    if (t == 0) return t;
+
+    Term* t2 = ParseTermRest(t);
+
+    return t2;
+}
+
+Term*
+Parser::ParseTermRest(Term* base)
+{
+    Token    t = lexer->Get();
+
+    switch (t.type()) {
+    default:
+        lexer->PutBack();
+        return base;
+
+    case Token::StringLiteral: {
+            // concatenate adjacent string literal tokens:
+            TermText* text  = base->isText();
+            if (text) {
+                TermText* base2 = new TermText(text->value() + t.symbol()(1, t.symbol().length()-2));
+                delete base;
+                return ParseTermRest(base2);
+            }
+            else {
+                lexer->PutBack();
+            }
+        }
+        break;
+
+    case Token::Keyword:
+        switch (t.key()) {
+        case KEY_DEF:
+            if (base->isText())
+                return new TermDef(base->isText(), ParseTerm());
+            else
+                return error("(Parse) illegal lhs in def", t);
+
+        default:
+            lexer->PutBack();
+            return base;
+        }
+        break;
+    }
+
+    return base;
+}
+
+static int xtol(const char* p)
+{
+    int n = 0;
+
+    while (*p) {
+        char digit = *p++;
+        n *= 16;
+
+        if (digit >= '0' && digit <= '9')
+            n += digit - '0';
+
+        else if (digit >= 'a' && digit <= 'f')
+            n += digit - 'a' + 10;
+
+        else if (digit >= 'A' && digit <= 'F')
+            n += digit - 'A' + 10;
+    }
+
+    return n;
+}
+
+Term*
+Parser::ParseTermBase()
+{
+    Token    t = lexer->Get();
+    int      n = 0;
+    double   d = 0.0;
+
+    switch (t.type()) {
+    case Token::IntLiteral: {
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        char nstr[256], *p = nstr;
+        for (int i = 0; i < (int) t.symbol().length(); i++)
+            if (t.symbol()[i] != '_')
+                *p++ = t.symbol()[i];
+        *p++ = '\0';
+
+        // handle hex notation:
+        if (nstr[1] == 'x')
+            n = xtol(nstr+2);
+
+        else
+            n = atol(nstr);
+
+        return new TermNumber(n);
+        }
+
+    case Token::FloatLiteral: {
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        char nstr[256], *p = nstr;
+        for (int i = 0; i < (int) t.symbol().length(); i++)
+            if (t.symbol()[i] != '_')
+                *p++ = t.symbol()[i];
+        *p++ = '\0';
+
+        d = atof(nstr);
+        return new TermNumber(d);
+        }
+
+    case Token::StringLiteral:
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        return new TermText(t.symbol()(1, t.symbol().length()-2));
+
+    case Token::AlphaIdent:
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        return new TermText(t.symbol());
+
+    case Token::Keyword:
+        if (dump_tokens)
+            Print("%s", t.symbol().data());
+
+        switch (t.key()) {
+        case KEY_FALSE:   return new TermBool(0);
+        case KEY_TRUE:    return new TermBool(1);
+
+        case KEY_MINUS: {
+                Token next = lexer->Get();
+                if (next.type() == Token::IntLiteral) {
+                    if (dump_tokens)
+                        Print("%s", next.symbol().data());
+
+                    char nstr[256], *p = nstr;
+                    for (int i = 0; i < (int) next.symbol().length(); i++)
+                        if (next.symbol()[i] != '_')
+                            *p++ = next.symbol()[i];
+                    *p++ = '\0';
+
+                    n = -1 * atol(nstr);
+                    return new TermNumber(n);
+                }
+                else if (next.type() == Token::FloatLiteral) {
+                    if (dump_tokens)
+                        Print("%s", next.symbol().data());
+
+                    char nstr[256], *p = nstr;
+                    for (int i = 0; i < (int) next.symbol().length(); i++)
+                        if (next.symbol()[i] != '_')
+                            *p++ = next.symbol()[i];
+                    *p++ = '\0';
+
+                    d = -1.0 * atof(nstr);
+                    return new TermNumber(d);
+                }
+                else {
+                    lexer->PutBack();
+                    return error("(Parse) illegal token '-': number expected", next);
+                }
+            }
+            break;
+
+        default:
+            lexer->PutBack();
+            return 0;
+        }
+
+    case Token::LParen:  return ParseArray();
+
+    case Token::LBrace:  return ParseStruct();
+
+    case Token::CharLiteral:
+        return error("(Parse) illegal token ", t);
+
+    default:
+        lexer->PutBack();
+        return 0;
+    }
+}
+
+TermArray*
+Parser::ParseArray()
+{
+    TermList*   elems = ParseTermList(0);
+    Token       end = lexer->Get();
+
+    if (end.type() != Token::RParen)
+        return (TermArray*) error("(Parse) ')' missing in array-decl", end);
+
+    return new TermArray(elems);
+}
+
+TermStruct*
+Parser::ParseStruct()
+{
+    TermList*   elems = ParseTermList(1);
+    Token       end = lexer->Get();
+
+    if (end.type() != Token::RBrace)
+        return (TermStruct*) error("(Parse) '}' missing in struct", end);
+
+    return new TermStruct(elems);
+}
+
+TermList*
+Parser::ParseTermList(int for_struct)
+{
+    TermList*   tlist = new TermList;
+
+    Term* term = ParseTerm();
+    while (term) {
+        if (for_struct && !term->isDef()) {
+            return (TermList*) error("(Parse) non-definition term in struct");
+        }
+        else if (!for_struct && term->isDef()) {
+            return (TermList*) error("(Parse) illegal definition in array");
+        }
+
+        tlist->append(term);
+        Token t = lexer->Get();
+
+        /*** OLD WAY: COMMA SEPARATORS REQUIRED ***
+        if (t.type() != Token::Comma) {
+            lexer->PutBack();
+            term = 0;
+        }
+        else
+            term = ParseTerm();
+        /*******************************************/
+
+        // NEW WAY: COMMA SEPARATORS OPTIONAL:
+        if (t.type() != Token::Comma) {
+            lexer->PutBack();
+        }
+
+        term = ParseTerm();
+    }
+
+    return tlist;
+}
diff --git a/DefinitionEx/src/Term.cpp b/DefinitionEx/src/Term.cpp
new file mode 100644
index 0000000..acfdcb8
--- /dev/null
+++ b/DefinitionEx/src/Term.cpp
@@ -0,0 +1,119 @@
+/*  Starshatter: The Open Source Project
+    Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors
+    Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+    Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+    AUTHOR:       John DiCamillo
+
+
+    OVERVIEW
+    ========
+    Implementation of the Term class
+*/
+
+#include <Term.h>
+
+#include <Utils.h>
+
+// +-------------------------------------------------------------------+
+
+Term*
+error(const char* s1, const char* s2)
+{
+    Print("ERROR: ");
+    if (s1) Print(s1);
+    if (s2) Print(s2);
+    Print("\n\n");
+    return 0;
+}
+
+// +-------------------------------------------------------------------+
+
+void TermBool::print(int level)  { if (level > 0) Print(val? "true" : "false"); else Print("..."); }
+void TermNumber::print(int level){ if (level > 0) Print("%g", val);     else Print("..."); }
+void TermText::print(int level)  { if (level > 0) Print("\"%s\"", val.data()); else Print("..."); }
+
+// +-------------------------------------------------------------------+
+
+TermArray::TermArray(TermList* elist)
+{
+    elems = elist;
+}
+
+TermArray::~TermArray()
+{
+    if (elems) elems->destroy();
+    delete elems;
+}
+
+void
+TermArray::print(int level)
+{
+    if (level > 1) {
+        Print("(");
+
+        if (elems) {
+            for (int i = 0; i < elems->size(); i++) {
+                elems->at(i)->print(level-1);
+                if (i < elems->size() -1)
+                    Print(", ");
+            }
+        }
+
+        Print(") ");
+    }
+    else Print("(...) ");
+}
+
+// +-------------------------------------------------------------------+
+
+TermStruct::TermStruct(TermList* elist)
+{
+    elems = elist;
+}
+
+TermStruct::~TermStruct()
+{
+    if (elems) elems->destroy();
+    delete elems;
+}
+
+void
+TermStruct::print(int level)
+{
+    if (level > 1) {
+        Print("{");
+
+        if (elems) {
+            for (int i = 0; i < elems->size(); i++) {
+                elems->at(i)->print(level-1);
+                if (i < elems->size() -1)
+                    Print(", ");
+            }
+        }
+
+        Print("} ");
+    }
+    else Print("{...} ");
+}
+
+// +-------------------------------------------------------------------+
+
+TermDef::~TermDef()
+{
+    delete mname;
+    delete mval;
+}
+
+void
+TermDef::print(int level)
+{
+    if (level >= 0) {
+        mname->print(level);
+        Print(": ");
+        mval->print(level-1);
+    }
+    else Print("...");
+}
+
+// +-------------------------------------------------------------------+
diff --git a/DefinitionEx/src/Token.cpp b/DefinitionEx/src/Token.cpp
new file mode 100644
index 0000000..3f516de
--- /dev/null
+++ b/DefinitionEx/src/Token.cpp
@@ -0,0 +1,549 @@
+/*  Starshatter: The Open Source Project
+    Copyright (c) 2021-2024, Starshatter: The Open Source Project Contributors
+    Copyright (c) 2011-2012, Starshatter OpenSource Distribution Contributors
+    Copyright (c) 1997-2006, Destroyer Studios LLC.
+
+    AUTHOR:       John DiCamillo
+
+
+    OVERVIEW
+    ========
+    Scanner class implementation
+*/
+
+#include <Token.h>
+
+#include <ctype.h>
+
+#include <cstddef>
+#include <utility>
+
+#include <starshatter/foundation/reader.h>
+#include <Text.h>
+
+// +-------------------------------------------------------------------+
+
+bool        Token::hidecom   = true;
+char        Token::combeg[3] = "//";
+char        Token::comend[3] = "\n";
+char        Token::altbeg[3] = "/*";
+char        Token::altend[3] = "*/";
+Dictionary<int>   Token::keymap;
+
+// +-------------------------------------------------------------------+
+
+Token::Token()
+    : mType(Undefined), mKey(0), mLine(0), mColumn(0)
+{
+    mLength    = 0;
+    mSymbol[0] = '\0';
+}
+
+Token::Token(const Token& rhs)
+    : mType(rhs.mType), mKey(rhs.mKey), mLine(rhs.mLine), mColumn(rhs.mColumn)
+{
+    mLength = rhs.mLength;
+    if (mLength < 8) {
+        strcpy(mSymbol, rhs.mSymbol);
+    }
+    else {
+        mFullSymbol = new char[mLength + 1];
+        strcpy(mFullSymbol, rhs.mFullSymbol);
+    }
+}
+
+Token::Token(int t)
+    : mType(t), mKey(0), mLine(0), mColumn(0)
+{
+    mLength    = 0;
+    mSymbol[0] = '\0';
+}
+
+Token::Token(const char* s, int t, int k, int l, int c)
+    : mType(t), mKey(k), mLine(l), mColumn(c)
+{
+    mLength = strlen(s);
+    if (mLength < 8) {
+        strcpy(mSymbol, s);
+    }
+    else {
+        mFullSymbol = new char[mLength + 1];
+        strcpy(mFullSymbol, s);
+    }
+}
+
+Token::Token(const Text& s, int t, int k, int l, int c)
+    : mType(t), mKey(k), mLine(l), mColumn(c)
+{
+    mLength = s.length();
+    if (mLength < 8) {
+        strcpy(mSymbol, s.data());
+    }
+    else {
+        mFullSymbol = new char[mLength + 1];
+        strcpy(mFullSymbol, s.data());
+    }
+}
+
+Token::~Token()
+{
+    if (mLength >= 8)
+        delete [] mFullSymbol;
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::close()
+{
+    keymap.clear();
+}
+
+// +-------------------------------------------------------------------+
+
+Token&
+Token::operator = (const Token& rhs)
+{
+    if (mLength >= 8)
+        delete [] mFullSymbol;
+
+    mLength = rhs.mLength;
+    if (mLength < 8) {
+        strcpy(mSymbol, rhs.mSymbol);
+    }
+    else {
+        mFullSymbol = new char[mLength + 1];
+        strcpy(mFullSymbol, rhs.mFullSymbol);
+    }
+
+    mType   = rhs.mType;
+    mKey    = rhs.mKey;
+    mLine   = rhs.mLine;
+    mColumn = rhs.mColumn;
+
+    return *this;
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Token::match(const Token& ref) const
+{
+    if (mType == ref.mType) {                    // if types match
+        if (ref.mLength == 0)                     // if no symbol to match
+            return true;                           // match!
+
+        else if (mLength == ref.mLength) {        // else if symbols match
+            if (mLength < 8) {
+                if (!strcmp(mSymbol, ref.mSymbol))
+                    return true;                     // match!
+            }
+            else {
+                if (!strcmp(mFullSymbol, ref.mFullSymbol))
+                    return true;                     // match!
+            }
+        }
+    }
+
+    return false;
+}
+
+// +-------------------------------------------------------------------+
+
+Text
+Token::symbol() const
+{
+    if (mLength < 8)
+        return Text(mSymbol);
+    else
+        return Text(mFullSymbol);
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::addKey(const Text& k, int v)
+{
+    keymap.insert(k, v);
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::addKeys(Dictionary<int>& keys)
+{
+    DictionaryIter<int> iter = keys;
+    while (++iter)
+        keymap.insert(iter.key(), iter.value());
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Token::findKey(const Text& k, int& v)
+{
+    if (keymap.contains(k)) {
+        v = keymap.find(k, 0);
+        return true;
+    }
+    else
+        return false;
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::comments(const Text& begin, const Text& end)
+{
+    combeg[0] = begin(0);
+    if (begin.length() > 1) combeg[1] = begin(1);
+    else                    combeg[1] = '\0';
+
+    comend[0] = end(0);
+    if (end.length() > 1)   comend[1] = end(1);
+    else                    comend[1] = '\0';
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Token::altComments(const Text& begin, const Text& end)
+{
+    altbeg[0] = begin(0);
+    if (begin.length() > 1) altbeg[1] = begin(1);
+    else                    altbeg[1] = '\0';
+
+    altend[0] = end(0);
+    if (end.length() > 1)   altend[1] = end(1);
+    else                    altend[1] = '\0';
+}
+
+// +-------------------------------------------------------------------+
+
+Text
+Token::typestr() const
+{
+    Text t = "Unknown";
+    switch (type()) {
+    case Undefined:      t = "Undefined"; break;
+    case Keyword:        t = "Keyword"; break;
+    case AlphaIdent:     t = "AlphaIdent"; break;
+    case SymbolicIdent:  t = "SymbolicIdent"; break;
+    case Comment:        t = "Comment"; break;
+    case IntLiteral:     t = "IntLiteral"; break;
+    case FloatLiteral:   t = "FloatLiteral"; break;
+    case StringLiteral:  t = "StringLiteral"; break;
+    case CharLiteral:    t = "CharLiteral"; break;
+    case Dot:            t = "Dot"; break;
+    case Comma:          t = "Comma"; break;
+    case Colon:          t = "Colon"; break;
+    case Semicolon:      t = "Semicolon"; break;
+    case LParen:         t = "LParen"; break;
+    case RParen:         t = "RParen"; break;
+    case LBracket:       t = "LBracket"; break;
+    case RBracket:       t = "RBracket"; break;
+    case LBrace:         t = "LBrace"; break;
+    case RBrace:         t = "RBrace"; break;
+    case EOT:            t = "EOT"; break;
+    case LastTokenType:  t = "LastTokenType"; break;
+    }
+
+    return t;
+}
+
+// +-------------------------------------------------------------------+
+
+Text
+Token::describe(const Text& tok)
+{
+    Text d;
+
+    switch (tok(0)) {
+    case '.' : d = "Token::Dot"; break;
+    case ',' : d = "Token::Comma"; break;
+    case ';' : d = "Token::Semicolon"; break;
+    case '(' : d = "Token::LParen"; break;
+    case ')' : d = "Token::RParen"; break;
+    case '[' : d = "Token::LBracket"; break;
+    case ']' : d = "Token::RBracket"; break;
+    case '{' : d = "Token::LBrace"; break;
+    case '}' : d = "Token::RBrace"; break;
+    default  :               break;
+    }
+
+    if (d.length() == 0) {
+        if (isalpha(tok(0)))
+            d = "\"" + tok + "\", Token::AlphaIdent";
+        else if (isdigit(tok(0))) {
+            if (tok.contains("."))
+                d = "\"" + tok + "\", Token::FloatLiteral";
+            else
+                d = "\"" + tok + "\", Token::IntLiteral";
+        }
+        else
+            d = "\"" + tok + "\", Token::SymbolicIdent";
+    }
+
+    return d;
+}
+
+// +-------------------------------------------------------------------+
+
+Scanner::Scanner() :
+	reader {},
+	str {nullptr},
+	index {0},
+	old_index {0},
+	length {0},
+	line {0},
+	old_line {0},
+	lineStart {0}
+{
+}
+
+
+Scanner::Scanner(starshatter::foundation::Reader r) :
+	reader {std::move(r)},
+	str {nullptr},
+	index {0},
+	old_index {0},
+	length {0},
+	line {0},
+	old_line {0},
+	lineStart {0}
+{
+}
+
+
+Scanner::Scanner(const Text& s) :
+	reader {},
+	str {nullptr},
+	index {0},
+	old_index {0},
+	length {static_cast<std::size_t>(s.length())},
+	line {0},
+	old_line {0},
+	lineStart {0}
+{
+    str = new char [s.length() + 1];
+    strcpy(str, s.data());
+}
+
+
+Scanner::~Scanner()
+{
+    if (str)
+        delete [] str;
+}
+
+// +-------------------------------------------------------------------+
+
+void
+Scanner::Load(const Text& s)
+{
+    delete [] str;
+    str = new char [s.length() + 1];
+    strcpy(str, s.data());
+
+    index       = 0;
+    old_index   = 0;
+    best        = Token();
+    length      = s.length();
+    line        = 0;
+    old_line    = 0;
+    lineStart   = 0;
+}
+
+// +-------------------------------------------------------------------+
+
+Token
+Scanner::Get(Need need)
+{
+    int   type = Token::EOT;
+    old_index  = index;
+    old_line   = line;
+
+    eos = str + length;
+    p   = str + index;
+
+    if (p >= eos) {
+        if (need == Demand && reader.valid()) {
+            Load(reader.more());
+            if (length > 0)
+                return Get(need);
+        }
+        return Token("", type, 0, line, 0);
+    }
+
+    while (isspace(*p) && p < eos) { // skip initial white space
+        if (*p == '\n') {
+            line++;
+            lineStart = p - str;
+        }
+        p++;
+    }
+
+    if (p >= eos) {
+        if (need == Demand && reader.valid()) {
+            Load(reader.more());
+            if (length > 0)
+                return Get(need);
+        }
+        return Token("", type, 0, line, 0);
+    }
+
+    Token  result;
+    size_t start = p - str;
+
+    if (*p == '"' || *p == '\'') {   // special case for quoted tokens
+
+        if (*p == '"') type = Token::StringLiteral;
+        else           type = Token::CharLiteral;
+
+        char match = *p;
+        while (++p < eos) {
+            if (*p == match) {         // find matching quote
+                if (*(p-1) != '\\') {   // if not escaped
+                    p++;                 // token includes matching quote
+                    break;
+                }
+            }
+        }
+    }
+
+    // generic delimited comments
+    else if (*p == Token::comBeg(0) &&
+                     (!Token::comBeg(1) || *(p+1) == Token::comBeg(1))) {
+        type = Token::Comment;
+        while (++p < eos) {
+            if (*p == Token::comEnd(0) &&
+                      (!Token::comEnd(1) || *(p+1) == Token::comEnd(1))) {
+                p++; if (Token::comEnd(1)) p++;
+                break;
+            }
+        }
+    }
+
+    // alternate form delimited comments
+    else if (*p == Token::altBeg(0) &&
+                     (!Token::altBeg(1) || *(p+1) == Token::altBeg(1))) {
+        type = Token::Comment;
+        while (++p < eos) {
+            if (*p == Token::altEnd(0) &&
+                      (!Token::altEnd(1) || *(p+1) == Token::altEnd(1))) {
+                p++; if (Token::altEnd(1)) p++;
+                break;
+            }
+        }
+    }
+
+    else if (*p == '.')  type = Token::Dot;
+    else if (*p == ',')  type = Token::Comma;
+    else if (*p == ';')  type = Token::Semicolon;
+    else if (*p == '(')  type = Token::LParen;
+    else if (*p == ')')  type = Token::RParen;
+    else if (*p == '[')  type = Token::LBracket;
+    else if (*p == ']')  type = Token::RBracket;
+    else if (*p == '{')  type = Token::LBrace;
+    else if (*p == '}')  type = Token::RBrace;
+
+    // use lexical sub-parser for ints and floats
+    else if (isdigit(*p))
+        type = GetNumeric();
+
+    else if (IsSymbolic(*p)) {
+        type = Token::SymbolicIdent;
+        while (IsSymbolic(*p)) p++;
+    }
+
+    else {
+        type = Token::AlphaIdent;
+        while (IsAlpha(*p)) p++;
+    }
+
+    size_t extent = (p - str) - start;
+
+    if (extent < 1) extent = 1;      // always get at least one character
+
+    index  = start + extent;         // advance the cursor
+    int col = start - lineStart;
+    if (line == 0) col++;
+
+    char* buf = new char [extent + 1];
+    strncpy(buf, str + start, extent);
+    buf[extent] = '\0';
+
+    if (type == Token::Comment && Token::hidecom) {
+        delete [] buf;
+        if (Token::comEnd(0) == '\n') {
+            line++;
+            lineStart = p - str;
+        }
+        return Get(need);
+    }
+
+    if (type == Token::AlphaIdent || // check for keyword
+         type == Token::SymbolicIdent) {
+         int val;
+         if (Token::findKey(Text(buf), val))
+            result = Token(buf, Token::Keyword, val, line+1, col);
+    }
+
+    if (result.mType != Token::Keyword)
+        result = Token(buf, type, 0, line+1, col);
+
+    if (line+1 >  (size_t) best.mLine ||
+        (line+1 == (size_t) best.mLine && col > best.mColumn))
+        best = result;
+
+    delete [] buf;
+    return result;
+}
+
+// +-------------------------------------------------------------------+
+
+int
+Scanner::GetNumeric()
+{
+    int type = Token::IntLiteral;             // assume int
+
+    if (*p == '0' && *(p+1) == 'x') {         // check for hex:
+        p += 2;
+        while (isxdigit(*p)) p++;
+        return type;
+    }
+
+    while (isdigit(*p) || *p == '_') p++;     // whole number part
+
+    if (*p == '.') { p++;                     // optional fract part
+        type = Token::FloatLiteral;            // implies float
+
+        while (isdigit(*p) || *p == '_') p++;  // fractional part
+    }
+
+    if (*p == 'E' || *p == 'e') {  p++;       // optional exponent
+        if (*p == '+' || *p == '-') p++;       // which may be signed
+        while (isdigit(*p)) p++;
+
+        type = Token::FloatLiteral;            // implies float
+    }
+
+    return type;
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Scanner::IsAlpha(char c)
+{
+    return (isalpha(*p) || isdigit(*p) || (*p == '_'))?true:false;
+}
+
+// +-------------------------------------------------------------------+
+
+bool
+Scanner::IsSymbolic(char c)
+{
+    const char* s = "+-*/\\<=>~!@#$%^&|:";
+    return strchr(s, c)?true:false;
+}
author	Aki <please@ignore.pl>	2024-03-19 22:56:52 +0100
committer	Aki <please@ignore.pl>	2024-03-19 22:56:52 +0100
commit	f5b8091ee91b8323b8e2b1044ba8be676f2bfaf4 (patch)
tree	bbcfc73dd8db75579127686f9cd07e9c45d374d0 /DefinitionEx/src
parent	2066e4911948d11cac5a234d2f7773dc5f06ba96 (diff)
download	starshatter-f5b8091ee91b8323b8e2b1044ba8be676f2bfaf4.zip starshatter-f5b8091ee91b8323b8e2b1044ba8be676f2bfaf4.tar.gz starshatter-f5b8091ee91b8323b8e2b1044ba8be676f2bfaf4.tar.bz2