Starshatter_Open
Open source Starshatter engine
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Token.cpp
Go to the documentation of this file.
1 /* Project STARS
2  John DiCamillo Software Consulting
3  Copyright © 1997-2000. All Rights Reserved.
4 
5  SUBSYSTEM: Stars
6  FILE: token.cpp
7  AUTHOR: John DiCamillo
8 
9 
10  OVERVIEW
11  ========
12  Scanner class implementation
13 */
14 
15 #include "MemDebug.h"
16 #include "Token.h"
17 #include "Reader.h"
18 #include "Text.h"
19 
20 #include <ctype.h>
21 
22 // +-------------------------------------------------------------------+
23 
24 bool Token::hidecom = true;
25 char Token::combeg[3] = "//";
26 char Token::comend[3] = "\n";
27 char Token::altbeg[3] = "/*";
28 char Token::altend[3] = "*/";
30 
31 // +-------------------------------------------------------------------+
32 
34  : mType(Undefined), mKey(0), mLine(0), mColumn(0)
35 {
36  mLength = 0;
37  mSymbol[0] = '\0';
38 }
39 
40 Token::Token(const Token& rhs)
41  : mType(rhs.mType), mKey(rhs.mKey), mLine(rhs.mLine), mColumn(rhs.mColumn)
42 {
43  mLength = rhs.mLength;
44  if (mLength < 8) {
45  strcpy_s(mSymbol, rhs.mSymbol);
46  }
47  else {
48  mFullSymbol = new(__FILE__, __LINE__) char[mLength + 1];
49  strcpy(mFullSymbol, rhs.mFullSymbol);
50  }
51 }
52 
54  : mType(t), mKey(0), mLine(0), mColumn(0)
55 {
56  mLength = 0;
57  mSymbol[0] = '\0';
58 }
59 
60 Token::Token(const char* s, int t, int k, int l, int c)
61  : mType(t), mKey(k), mLine(l), mColumn(c)
62 {
63  mLength = strlen(s);
64  if (mLength < 8) {
65  strcpy_s(mSymbol, s);
66  }
67  else {
68  mFullSymbol = new(__FILE__, __LINE__) char[mLength + 1];
69  strcpy(mFullSymbol, s);
70  }
71 }
72 
73 Token::Token(const Text& s, int t, int k, int l, int c)
74  : mType(t), mKey(k), mLine(l), mColumn(c)
75 {
76  mLength = s.length();
77  if (mLength < 8) {
78  strcpy_s(mSymbol, s.data());
79  }
80  else {
81  mFullSymbol = new(__FILE__, __LINE__) char[mLength + 1];
82  strcpy(mFullSymbol, s.data());
83  }
84 }
85 
87 {
88  if (mLength >= 8)
89  delete [] mFullSymbol;
90 }
91 
92 // +-------------------------------------------------------------------+
93 
94 void
96 {
97  keymap.clear();
98 }
99 
100 // +-------------------------------------------------------------------+
101 
102 Token&
104 {
105  if (mLength >= 8)
106  delete [] mFullSymbol;
107 
108  mLength = rhs.mLength;
109  if (mLength < 8) {
110  strcpy_s(mSymbol, rhs.mSymbol);
111  }
112  else {
113  mFullSymbol = new(__FILE__, __LINE__) char[mLength + 1];
114  strcpy(mFullSymbol, rhs.mFullSymbol);
115  }
116 
117  mType = rhs.mType;
118  mKey = rhs.mKey;
119  mLine = rhs.mLine;
120  mColumn = rhs.mColumn;
121 
122  return *this;
123 }
124 
125 // +-------------------------------------------------------------------+
126 
127 bool
128 Token::match(const Token& ref) const
129 {
130  if (mType == ref.mType) { // if types match
131  if (ref.mLength == 0) // if no symbol to match
132  return true; // match!
133 
134  else if (mLength == ref.mLength) { // else if symbols match
135  if (mLength < 8) {
136  if (!strcmp(mSymbol, ref.mSymbol))
137  return true; // match!
138  }
139  else {
140  if (!strcmp(mFullSymbol, ref.mFullSymbol))
141  return true; // match!
142  }
143  }
144  }
145 
146  return false;
147 }
148 
149 // +-------------------------------------------------------------------+
150 
151 Text
153 {
154  if (mLength < 8)
155  return Text(mSymbol);
156  else
157  return Text(mFullSymbol);
158 }
159 
160 // +-------------------------------------------------------------------+
161 
162 void
163 Token::addKey(const Text& k, int v)
164 {
165  keymap.insert(k, v);
166 }
167 
168 // +-------------------------------------------------------------------+
169 
170 void
172 {
173  DictionaryIter<int> iter = keys;
174  while (++iter)
175  keymap.insert(iter.key(), iter.value());
176 }
177 
178 // +-------------------------------------------------------------------+
179 
180 bool
181 Token::findKey(const Text& k, int& v)
182 {
183  if (keymap.contains(k)) {
184  v = keymap.find(k, 0);
185  return true;
186  }
187  else
188  return false;
189 }
190 
191 // +-------------------------------------------------------------------+
192 
193 void
194 Token::comments(const Text& begin, const Text& end)
195 {
196  combeg[0] = begin(0);
197  if (begin.length() > 1) combeg[1] = begin(1);
198  else combeg[1] = '\0';
199 
200  comend[0] = end(0);
201  if (end.length() > 1) comend[1] = end(1);
202  else comend[1] = '\0';
203 }
204 
205 // +-------------------------------------------------------------------+
206 
207 void
208 Token::altComments(const Text& begin, const Text& end)
209 {
210  altbeg[0] = begin(0);
211  if (begin.length() > 1) altbeg[1] = begin(1);
212  else altbeg[1] = '\0';
213 
214  altend[0] = end(0);
215  if (end.length() > 1) altend[1] = end(1);
216  else altend[1] = '\0';
217 }
218 
219 // +-------------------------------------------------------------------+
220 
221 Text
223 {
224  Text t = "Unknown";
225  switch (type()) {
226  case Undefined: t = "Undefined"; break;
227  case Keyword: t = "Keyword"; break;
228  case AlphaIdent: t = "AlphaIdent"; break;
229  case SymbolicIdent: t = "SymbolicIdent"; break;
230  case Comment: t = "Comment"; break;
231  case IntLiteral: t = "IntLiteral"; break;
232  case FloatLiteral: t = "FloatLiteral"; break;
233  case StringLiteral: t = "StringLiteral"; break;
234  case CharLiteral: t = "CharLiteral"; break;
235  case Dot: t = "Dot"; break;
236  case Comma: t = "Comma"; break;
237  case Colon: t = "Colon"; break;
238  case Semicolon: t = "Semicolon"; break;
239  case LParen: t = "LParen"; break;
240  case RParen: t = "RParen"; break;
241  case LBracket: t = "LBracket"; break;
242  case RBracket: t = "RBracket"; break;
243  case LBrace: t = "LBrace"; break;
244  case RBrace: t = "RBrace"; break;
245  case EOT: t = "EOT"; break;
246  case LastTokenType: t = "LastTokenType"; break;
247  }
248 
249  return t;
250 }
251 
252 // +-------------------------------------------------------------------+
253 
254 Text
256 {
257  Text d;
258 
259  switch (tok(0)) {
260  case '.' : d = "Token::Dot"; break;
261  case ',' : d = "Token::Comma"; break;
262  case ';' : d = "Token::Semicolon"; break;
263  case '(' : d = "Token::LParen"; break;
264  case ')' : d = "Token::RParen"; break;
265  case '[' : d = "Token::LBracket"; break;
266  case ']' : d = "Token::RBracket"; break;
267  case '{' : d = "Token::LBrace"; break;
268  case '}' : d = "Token::RBrace"; break;
269  default : break;
270  }
271 
272  if (d.length() == 0) {
273  if (isalpha(tok(0)))
274  d = "\"" + tok + "\", Token::AlphaIdent";
275  else if (isdigit(tok(0))) {
276  if (tok.contains("."))
277  d = "\"" + tok + "\", Token::FloatLiteral";
278  else
279  d = "\"" + tok + "\", Token::IntLiteral";
280  }
281  else
282  d = "\"" + tok + "\", Token::SymbolicIdent";
283  }
284 
285  return d;
286 }
287 
288 // +-------------------------------------------------------------------+
289 
291  : reader(r), str(0), index(0), old_index(0),
292  length(0), line(0), old_line(0), lineStart(0)
293 { }
294 
296  : index(rhs.index), old_index(rhs.old_index), length(rhs.length),
297  reader(rhs.reader),
298  line(rhs.line), old_line(0), lineStart(rhs.lineStart)
299 {
300  str = new(__FILE__, __LINE__) char [strlen(rhs.str) + 1];
301  strcpy(str, rhs.str);
302 }
303 
305  : reader(0), index(0), old_index(0), length(s.length()), line(0),
306  old_line(0), lineStart(0)
307 {
308  str = new(__FILE__, __LINE__) char [s.length() + 1];
309  strcpy(str, s.data());
310 }
311 
313 {
314  delete [] str;
315 }
316 
317 // +-------------------------------------------------------------------+
318 
319 Scanner&
321 {
322  delete [] str;
323  str = new(__FILE__, __LINE__) char [strlen(rhs.str) + 1];
324  strcpy(str, rhs.str);
325 
326  index = rhs.index;
327  old_index = rhs.old_index;
328  length = rhs.length;
329  line = rhs.line;
330  old_line = rhs.old_line;
331  lineStart = rhs.lineStart;
332 
333  return *this;
334 }
335 
336 // +-------------------------------------------------------------------+
337 
338 void
340 {
341  delete [] str;
342  str = new(__FILE__, __LINE__) char [s.length() + 1];
343  strcpy(str, s.data());
344 
345  index = 0;
346  old_index = 0;
347  best = Token();
348  length = s.length();
349  line = 0;
350  old_line = 0;
351  lineStart = 0;
352 }
353 
354 // +-------------------------------------------------------------------+
355 
356 Token
358 {
359  int type = Token::EOT;
360  old_index = index;
361  old_line = line;
362 
363  eos = str + length;
364  p = str + index;
365 
366  if (p >= eos) {
367  if (need == Demand && reader) {
368  Load(reader->more());
369  if (length > 0)
370  return Get(need);
371  }
372  return Token("", type, 0, line, 0);
373  }
374 
375  while (isspace(*p) && p < eos) { // skip initial white space
376  if (*p == '\n') {
377  line++;
378  lineStart = p - str;
379  }
380  p++;
381  }
382 
383  if (p >= eos) {
384  if (need == Demand && reader) {
385  Load(reader->more());
386  if (length > 0)
387  return Get(need);
388  }
389  return Token("", type, 0, line, 0);
390  }
391 
392  Token result;
393  size_t start = p - str;
394 
395  if (*p == '"' || *p == '\'') { // special case for quoted tokens
396 
397  if (*p == '"') type = Token::StringLiteral;
398  else type = Token::CharLiteral;
399 
400  char match = *p;
401  while (++p < eos) {
402  if (*p == match) { // find matching quote
403  if (*(p-1) != '\\') { // if not escaped
404  p++; // token includes matching quote
405  break;
406  }
407  }
408  }
409  }
410 
411  // generic delimited comments
412  else if (*p == Token::comBeg(0) &&
413  (!Token::comBeg(1) || *(p+1) == Token::comBeg(1))) {
414  type = Token::Comment;
415  while (++p < eos) {
416  if (*p == Token::comEnd(0) &&
417  (!Token::comEnd(1) || *(p+1) == Token::comEnd(1))) {
418  p++; if (Token::comEnd(1)) p++;
419  break;
420  }
421  }
422  }
423 
424  // alternate form delimited comments
425  else if (*p == Token::altBeg(0) &&
426  (!Token::altBeg(1) || *(p+1) == Token::altBeg(1))) {
427  type = Token::Comment;
428  while (++p < eos) {
429  if (*p == Token::altEnd(0) &&
430  (!Token::altEnd(1) || *(p+1) == Token::altEnd(1))) {
431  p++; if (Token::altEnd(1)) p++;
432  break;
433  }
434  }
435  }
436 
437  else if (*p == '.') type = Token::Dot;
438  else if (*p == ',') type = Token::Comma;
439  else if (*p == ';') type = Token::Semicolon;
440  else if (*p == '(') type = Token::LParen;
441  else if (*p == ')') type = Token::RParen;
442  else if (*p == '[') type = Token::LBracket;
443  else if (*p == ']') type = Token::RBracket;
444  else if (*p == '{') type = Token::LBrace;
445  else if (*p == '}') type = Token::RBrace;
446 
447  // use lexical sub-parser for ints and floats
448  else if (isdigit(*p))
449  type = GetNumeric();
450 
451  else if (IsSymbolic(*p)) {
452  type = Token::SymbolicIdent;
453  while (IsSymbolic(*p)) p++;
454  }
455 
456  else {
457  type = Token::AlphaIdent;
458  while (IsAlpha(*p)) p++;
459  }
460 
461  size_t extent = (p - str) - start;
462 
463  if (extent < 1) extent = 1; // always get at least one character
464 
465  index = start + extent; // advance the cursor
466  int col = start - lineStart;
467  if (line == 0) col++;
468 
469  char* buf = new(__FILE__, __LINE__) char [extent + 1];
470  strncpy(buf, str + start, extent);
471  buf[extent] = '\0';
472 
473  if (type == Token::Comment && Token::hidecom) {
474  delete [] buf;
475  if (Token::comEnd(0) == '\n') {
476  line++;
477  lineStart = p - str;
478  }
479  return Get(need);
480  }
481 
482  if (type == Token::AlphaIdent || // check for keyword
483  type == Token::SymbolicIdent) {
484  int val;
485  if (Token::findKey(Text(buf), val))
486  result = Token(buf, Token::Keyword, val, line+1, col);
487  }
488 
489  if (result.mType != Token::Keyword)
490  result = Token(buf, type, 0, line+1, col);
491 
492  if (line+1 > (size_t) best.mLine ||
493  (line+1 == (size_t) best.mLine && col > best.mColumn))
494  best = result;
495 
496  delete [] buf;
497  return result;
498 }
499 
500 // +-------------------------------------------------------------------+
501 
502 int
504 {
505  int type = Token::IntLiteral; // assume int
506 
507  if (*p == '0' && *(p+1) == 'x') { // check for hex:
508  p += 2;
509  while (isxdigit(*p)) p++;
510  return type;
511  }
512 
513  while (isdigit(*p) || *p == '_') p++; // whole number part
514 
515  if (*p == '.') { p++; // optional fract part
516  type = Token::FloatLiteral; // implies float
517 
518  while (isdigit(*p) || *p == '_') p++; // fractional part
519  }
520 
521  if (*p == 'E' || *p == 'e') { p++; // optional exponent
522  if (*p == '+' || *p == '-') p++; // which may be signed
523  while (isdigit(*p)) p++;
524 
525  type = Token::FloatLiteral; // implies float
526  }
527 
528  return type;
529 }
530 
531 // +-------------------------------------------------------------------+
532 
533 bool
535 {
536  return (isalpha(*p) || isdigit(*p) || (*p == '_'))?true:false;
537 }
538 
539 // +-------------------------------------------------------------------+
540 
541 bool
543 {
544  const char* s = "+-*/\\<=>~!@#$%^&|:";
545  return strchr(s, c)?true:false;
546 }