error tolerant mode for tokenizer

This commit is contained in:
Vadim Lopatin 2015-01-21 09:30:59 +03:00
parent 6036905692
commit c06627937d
2 changed files with 81 additions and 32 deletions

View File

@ -20,7 +20,8 @@ enum TokenType : ubyte {
INTEGER, INTEGER,
FLOAT, FLOAT,
KEYWORD, KEYWORD,
OP OP,
INVALID
} }
// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _ // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _
@ -245,16 +246,18 @@ const uint[1728] UNIVERSAL_ALPHA_FLAGS = [
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff
]; ];
// returns true if character is A..Z, a..z, _ or universal alpha /// returns true if character is A..Z, a..z, _ or universal alpha
public bool isUniversalAlpha(dchar ch) pure nothrow { bool isUniversalAlpha(dchar ch) pure nothrow {
return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31)))); return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31))));
} }
public bool isIdentStartChar(dchar ch) pure nothrow { /// character can present at the beginning of identifier
bool isIdentStartChar(dchar ch) pure nothrow {
return isUniversalAlpha(ch); return isUniversalAlpha(ch);
} }
public bool isIdentMiddleChar(dchar ch) pure nothrow { /// character can present in middle of identifier
bool isIdentMiddleChar(dchar ch) pure nothrow {
return (ch >= '0' && ch <='9') || isUniversalAlpha(ch); return (ch >= '0' && ch <='9') || isUniversalAlpha(ch);
} }
@ -1025,7 +1028,6 @@ class KeywordToken : Token {
} }
// do we need comment text? // do we need comment text?
class CommentToken : Token { class CommentToken : Token {
protected dchar[] _text; protected dchar[] _text;
protected bool _isDocumentationComment; protected bool _isDocumentationComment;
@ -1048,13 +1050,34 @@ class CommentToken : Token {
_text = text; _text = text;
} }
override public Token clone() { override public Token clone() {
return new CommentToken(_file, _line, _pos, _text); return new CommentToken(_file, _line, _pos, _text.dup);
} }
public override @property string toString() { public override @property string toString() {
return "Comment:" ~ to!string(_text); return "Comment:" ~ to!string(_text);
} }
} }
/// Invalid token holder - for error tolerant parsing
class InvalidToken : Token {
protected dchar[] _text;
@property override dchar[] text() { return _text; }
@property void text(dchar[] text) { _text = text; }
this() {
super(TokenType.INVALID);
}
this(SourceFile file, uint line, uint pos, dchar[] text) {
super(TokenType.INVALID, file, line, pos);
_text = text;
}
override Token clone() {
return new InvalidToken(_file, _line, _pos, _text.dup);
}
override @property string toString() {
return "Invalid:" ~ to!string(_text);
}
}
alias tokenizer_ident_t = uint; alias tokenizer_ident_t = uint;
alias tokenizer_ident_name_t = dchar[]; alias tokenizer_ident_name_t = dchar[];
@ -1291,6 +1314,7 @@ class Tokenizer
protected KeywordToken _sharedKeywordToken = new KeywordToken(); protected KeywordToken _sharedKeywordToken = new KeywordToken();
protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken(); protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken();
protected RealLiteralToken _sharedRealToken = new RealLiteralToken(); protected RealLiteralToken _sharedRealToken = new RealLiteralToken();
protected InvalidToken _sharedInvalidToken = new InvalidToken();
protected StringAppender _stringLiteralAppender; protected StringAppender _stringLiteralAppender;
protected StringAppender _commentAppender; protected StringAppender _commentAppender;
protected StringAppender _identAppender; protected StringAppender _identAppender;
@ -1321,14 +1345,16 @@ class Tokenizer
void init(SourceLines lineStream) { void init(SourceLines lineStream) {
_lineStream = lineStream; _lineStream = lineStream;
_sharedWhiteSpaceToken.setFile(_lineStream.file); SourceFile file = _lineStream.file;
_sharedCommentToken.setFile(_lineStream.file); _sharedWhiteSpaceToken.setFile(file);
_sharedStringLiteralToken.setFile(_lineStream.file); _sharedCommentToken.setFile(file);
_sharedIdentToken.setFile(_lineStream.file); _sharedStringLiteralToken.setFile(file);
_sharedOpToken.setFile(_lineStream.file); _sharedIdentToken.setFile(file);
_sharedKeywordToken.setFile(_lineStream.file); _sharedOpToken.setFile(file);
_sharedIntegerToken.setFile(_lineStream.file); _sharedKeywordToken.setFile(file);
_sharedRealToken.setFile(_lineStream.file); _sharedIntegerToken.setFile(file);
_sharedRealToken.setFile(file);
_sharedInvalidToken.setFile(file);
buildTime = Clock.currTime(); buildTime = Clock.currTime();
_line = lineStream.line; _line = lineStream.line;
_pos = 0; _pos = 0;
@ -1572,7 +1598,7 @@ class Tokenizer
_sharedIntegerToken.setFlags(unsignedFlag, longFlag); _sharedIntegerToken.setFlags(unsignedFlag, longFlag);
ch = _pos < _len ? _lineText[_pos] : 0; ch = _pos < _len ? _lineText[_pos] : 0;
if (isIdentMiddleChar(ch)) if (isIdentMiddleChar(ch))
parserError("Unexpected character after number"); return parserError("Unexpected character after number", _sharedIntegerToken);
return _sharedIntegerToken; return _sharedIntegerToken;
} }
@ -1580,7 +1606,7 @@ class Tokenizer
_sharedIntegerToken.setPos(_line, _pos - 1); _sharedIntegerToken.setPos(_line, _pos - 1);
_pos++; _pos++;
if (_pos >= _len) if (_pos >= _len)
parserError("Unexpected end of line in binary number"); return parserError("Unexpected end of line in binary number", _sharedIntegerToken);
int digits = 0; int digits = 0;
ulong number = 0; ulong number = 0;
int i = _pos; int i = _pos;
@ -1593,7 +1619,7 @@ class Tokenizer
} }
_pos = i; _pos = i;
if (digits > 64) if (digits > 64)
parserError("number is too big"); return parserError("number is too big", _sharedIntegerToken);
_sharedIntegerToken.setValue(number); _sharedIntegerToken.setValue(number);
return processIntegerSuffix(); return processIntegerSuffix();
} }
@ -1603,7 +1629,7 @@ class Tokenizer
_sharedRealToken.setPos(_line, _pos - 1); _sharedRealToken.setPos(_line, _pos - 1);
_pos++; _pos++;
if (_pos >= _len) if (_pos >= _len)
parserError("Unexpected end of line in hex number"); return parserError("Unexpected end of line in hex number", _sharedIntegerToken);
int digits = 0; int digits = 0;
ulong number = 0; ulong number = 0;
int i = _pos; int i = _pos;
@ -1625,7 +1651,7 @@ class Tokenizer
} }
_pos = i; _pos = i;
if (digits > 16) if (digits > 16)
parserError("number is too big to fit 64 bits"); return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
_sharedIntegerToken.setValue(number); _sharedIntegerToken.setValue(number);
return processIntegerSuffix(); return processIntegerSuffix();
} }
@ -1633,7 +1659,7 @@ class Tokenizer
protected Token processOctNumber() { protected Token processOctNumber() {
_sharedIntegerToken.setPos(_line, _pos - 1); _sharedIntegerToken.setPos(_line, _pos - 1);
if (_pos >= _len) if (_pos >= _len)
parserError("Unexpected end of line in octal number"); return parserError("Unexpected end of line in octal number", _sharedIntegerToken);
int digits = 0; int digits = 0;
ulong number = 0; ulong number = 0;
int i = _pos; int i = _pos;
@ -1659,7 +1685,7 @@ class Tokenizer
} }
_pos = i; _pos = i;
if (overflow) if (overflow)
parserError("number is too big to fit 64 bits"); return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
_sharedIntegerToken.setValue(number); _sharedIntegerToken.setValue(number);
return processIntegerSuffix(); return processIntegerSuffix();
} }
@ -1682,7 +1708,7 @@ class Tokenizer
sign = -1; sign = -1;
} }
if (_pos >= _len) if (_pos >= _len)
parserError("Invalid exponent"); return parserError("Invalid exponent", _sharedRealToken);
ulong digits = 0; ulong digits = 0;
ulong number = 0; ulong number = 0;
int i = _pos; int i = _pos;
@ -1707,7 +1733,7 @@ class Tokenizer
digits++; digits++;
} }
if (digits == 0) if (digits == 0)
parserError("Invalid exponent"); return parserError("Invalid exponent", _sharedRealToken);
_pos = i; _pos = i;
value *= pow(10., cast(long)number * sign); value *= pow(10., cast(long)number * sign);
return processDecFloatSuffix(value); return processDecFloatSuffix(value);
@ -1757,7 +1783,7 @@ class Tokenizer
_sharedIntegerToken.setPos(_line, _pos); _sharedIntegerToken.setPos(_line, _pos);
_sharedRealToken.setPos(_line, _pos); _sharedRealToken.setPos(_line, _pos);
if (_pos >= _len) if (_pos >= _len)
parserError("Unexpected end of line in number"); return parserError("Unexpected end of line in number", _sharedIntegerToken);
int digits = 0; int digits = 0;
ulong number = 0; ulong number = 0;
int i = _pos; int i = _pos;
@ -1783,7 +1809,7 @@ class Tokenizer
} }
_pos = i; _pos = i;
if (overflow) if (overflow)
parserError("number is too big to fit 64 bits"); return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
_sharedIntegerToken.setValue(number); _sharedIntegerToken.setValue(number);
dchar next = _pos < _len ? _lineText[_pos] : 0; dchar next = _pos < _len ? _lineText[_pos] : 0;
if (next == 0) if (next == 0)
@ -1795,7 +1821,16 @@ class Tokenizer
return processIntegerSuffix(); return processIntegerSuffix();
} }
protected void parserError(string msg) { /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
protected Token parserError(string msg, Token incompleteToken, dchar currentChar = 0) {
return parserError(msg, incompleteToken.line, incompleteToken.pos, currentChar);
}
/// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
protected Token parserError(string msg, int startLine, int startPos, dchar currentChar = 0) {
if (_errorTolerant) {
_sharedInvalidToken.setPos(startLine, startPos);
return _sharedInvalidToken;
}
throw new ParserException(msg, _lineStream.file.filename, _line, _pos); throw new ParserException(msg, _lineStream.file.filename, _line, _pos);
} }
@ -2264,11 +2299,11 @@ class Tokenizer
if (ch == 'c' || ch == 'w' || ch == 'd') if (ch == 'c' || ch == 'w' || ch == 'd')
t = ch; t = ch;
else if (isIdentMiddleChar(ch)) else if (isIdentMiddleChar(ch))
parserError("Unexpected character after string literal"); return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
} }
if (t != 0) { if (t != 0) {
if (type != 0 && t != type) if (type != 0 && t != type)
parserError("Cannot concatenate strings of different type"); return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken);
type = t; type = t;
} }
if (!wysiwyg) { if (!wysiwyg) {
@ -2324,7 +2359,7 @@ class Tokenizer
case Keyword.VERSION_: // Compiler version as an integer, such as 2001 case Keyword.VERSION_: // Compiler version as an integer, such as 2001
return makeSpecialTokenString(VERSION, pos); return makeSpecialTokenString(VERSION, pos);
default: default:
parserError("Unexpected token"); parserError("Unknown special token", _line, pos);
} }
return null; return null;
} }

View File

@ -34,6 +34,7 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter {
_file = new SourceFile(filename); _file = new SourceFile(filename);
_lines = new ArraySourceLines(); _lines = new ArraySourceLines();
_tokenizer = new Tokenizer(_lines); _tokenizer = new Tokenizer(_lines);
_tokenizer.errorTolerant = true;
} }
TokenPropString[] _props; TokenPropString[] _props;
@ -88,6 +89,18 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter {
case TokenType.STRING: case TokenType.STRING:
category = TokenCategory.String; category = TokenCategory.String;
break; break;
case TokenType.CHARACTER:
category = TokenCategory.Character;
break;
case TokenType.INTEGER:
category = TokenCategory.Integer;
break;
case TokenType.FLOAT:
category = TokenCategory.FLoat;
break;
case TokenType.INVALID:
category = TokenCategory.Error;
break;
default: default:
category = 0; category = 0;
break; break;
@ -97,7 +110,7 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter {
} }
} catch (Exception e) { } catch (Exception e) {
log.e("exception while trying to parse D source", e); Log.e("exception while trying to parse D source", e);
} }
_lines.close(); _lines.close();
_props = null; _props = null;
@ -113,7 +126,8 @@ class DSourceEdit : SourceEdit {
backgroundColor = 0xFFFFFF; backgroundColor = 0xFFFFFF;
setTokenHightlightColor(TokenCategory.Comment, 0x008000); // green setTokenHightlightColor(TokenCategory.Comment, 0x008000); // green
setTokenHightlightColor(TokenCategory.Keyword, 0x0000FF); // blue setTokenHightlightColor(TokenCategory.Keyword, 0x0000FF); // blue
setTokenHightlightColor(TokenCategory.String, 0xA31515); // red setTokenHightlightColor(TokenCategory.String, 0xA31515); // brown
setTokenHightlightColor(TokenCategory.Error, 0xFF0000); // red
setTokenHightlightColor(TokenCategory.Comment_Documentation, 0x206000); setTokenHightlightColor(TokenCategory.Comment_Documentation, 0x206000);
//setTokenHightlightColor(TokenCategory.Identifier, 0x206000); // no colors //setTokenHightlightColor(TokenCategory.Identifier, 0x206000); // no colors
} }