From c06627937d6b847849596767bf50fbaa1a525327 Mon Sep 17 00:00:00 2001 From: Vadim Lopatin Date: Wed, 21 Jan 2015 09:30:59 +0300 Subject: [PATCH] error tolerant mode for tokenizer --- src/ddc/lexer/tokenizer.d | 95 ++++++++++++++++++++++++++------------- src/dlangide/ui/frame.d | 18 +++++++- 2 files changed, 81 insertions(+), 32 deletions(-) diff --git a/src/ddc/lexer/tokenizer.d b/src/ddc/lexer/tokenizer.d index 2768b7c..4f0c7f0 100644 --- a/src/ddc/lexer/tokenizer.d +++ b/src/ddc/lexer/tokenizer.d @@ -20,7 +20,8 @@ enum TokenType : ubyte { INTEGER, FLOAT, KEYWORD, - OP + OP, + INVALID } // table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _ @@ -245,16 +246,18 @@ const uint[1728] UNIVERSAL_ALPHA_FLAGS = [ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff ]; -// returns true if character is A..Z, a..z, _ or universal alpha -public bool isUniversalAlpha(dchar ch) pure nothrow { +/// returns true if character is A..Z, a..z, _ or universal alpha +bool isUniversalAlpha(dchar ch) pure nothrow { return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31)))); } -public bool isIdentStartChar(dchar ch) pure nothrow { +/// character can present at the beginning of identifier +bool isIdentStartChar(dchar ch) pure nothrow { return isUniversalAlpha(ch); } -public bool isIdentMiddleChar(dchar ch) pure nothrow { +/// character can present in middle of identifier +bool isIdentMiddleChar(dchar ch) pure nothrow { return (ch >= '0' && ch <='9') || isUniversalAlpha(ch); } @@ -1025,7 +1028,6 @@ class KeywordToken : Token { } // do we need comment text? - class CommentToken : Token { protected dchar[] _text; protected bool _isDocumentationComment; @@ -1048,13 +1050,34 @@ class CommentToken : Token { _text = text; } override public Token clone() { - return new CommentToken(_file, _line, _pos, _text); + return new CommentToken(_file, _line, _pos, _text.dup); } public override @property string toString() { return "Comment:" ~ to!string(_text); } } +/// Invalid token holder - for error tolerant parsing +class InvalidToken : Token { + protected dchar[] _text; + + @property override dchar[] text() { return _text; } + @property void text(dchar[] text) { _text = text; } + this() { + super(TokenType.INVALID); + } + this(SourceFile file, uint line, uint pos, dchar[] text) { + super(TokenType.INVALID, file, line, pos); + _text = text; + } + override Token clone() { + return new InvalidToken(_file, _line, _pos, _text.dup); + } + override @property string toString() { + return "Invalid:" ~ to!string(_text); + } +} + alias tokenizer_ident_t = uint; alias tokenizer_ident_name_t = dchar[]; @@ -1291,6 +1314,7 @@ class Tokenizer protected KeywordToken _sharedKeywordToken = new KeywordToken(); protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken(); protected RealLiteralToken _sharedRealToken = new RealLiteralToken(); + protected InvalidToken _sharedInvalidToken = new InvalidToken(); protected StringAppender _stringLiteralAppender; protected StringAppender _commentAppender; protected StringAppender _identAppender; @@ -1321,14 +1345,16 @@ class Tokenizer void init(SourceLines lineStream) { _lineStream = lineStream; - _sharedWhiteSpaceToken.setFile(_lineStream.file); - _sharedCommentToken.setFile(_lineStream.file); - _sharedStringLiteralToken.setFile(_lineStream.file); - _sharedIdentToken.setFile(_lineStream.file); - _sharedOpToken.setFile(_lineStream.file); - _sharedKeywordToken.setFile(_lineStream.file); - _sharedIntegerToken.setFile(_lineStream.file); - _sharedRealToken.setFile(_lineStream.file); + SourceFile file = _lineStream.file; + _sharedWhiteSpaceToken.setFile(file); + _sharedCommentToken.setFile(file); + _sharedStringLiteralToken.setFile(file); + _sharedIdentToken.setFile(file); + _sharedOpToken.setFile(file); + _sharedKeywordToken.setFile(file); + _sharedIntegerToken.setFile(file); + _sharedRealToken.setFile(file); + _sharedInvalidToken.setFile(file); buildTime = Clock.currTime(); _line = lineStream.line; _pos = 0; @@ -1572,7 +1598,7 @@ class Tokenizer _sharedIntegerToken.setFlags(unsignedFlag, longFlag); ch = _pos < _len ? _lineText[_pos] : 0; if (isIdentMiddleChar(ch)) - parserError("Unexpected character after number"); + return parserError("Unexpected character after number", _sharedIntegerToken); return _sharedIntegerToken; } @@ -1580,7 +1606,7 @@ class Tokenizer _sharedIntegerToken.setPos(_line, _pos - 1); _pos++; if (_pos >= _len) - parserError("Unexpected end of line in binary number"); + return parserError("Unexpected end of line in binary number", _sharedIntegerToken); int digits = 0; ulong number = 0; int i = _pos; @@ -1593,7 +1619,7 @@ class Tokenizer } _pos = i; if (digits > 64) - parserError("number is too big"); + return parserError("number is too big", _sharedIntegerToken); _sharedIntegerToken.setValue(number); return processIntegerSuffix(); } @@ -1603,7 +1629,7 @@ class Tokenizer _sharedRealToken.setPos(_line, _pos - 1); _pos++; if (_pos >= _len) - parserError("Unexpected end of line in hex number"); + return parserError("Unexpected end of line in hex number", _sharedIntegerToken); int digits = 0; ulong number = 0; int i = _pos; @@ -1625,7 +1651,7 @@ class Tokenizer } _pos = i; if (digits > 16) - parserError("number is too big to fit 64 bits"); + return parserError("number is too big to fit 64 bits", _sharedIntegerToken); _sharedIntegerToken.setValue(number); return processIntegerSuffix(); } @@ -1633,7 +1659,7 @@ class Tokenizer protected Token processOctNumber() { _sharedIntegerToken.setPos(_line, _pos - 1); if (_pos >= _len) - parserError("Unexpected end of line in octal number"); + return parserError("Unexpected end of line in octal number", _sharedIntegerToken); int digits = 0; ulong number = 0; int i = _pos; @@ -1659,7 +1685,7 @@ class Tokenizer } _pos = i; if (overflow) - parserError("number is too big to fit 64 bits"); + return parserError("number is too big to fit 64 bits", _sharedIntegerToken); _sharedIntegerToken.setValue(number); return processIntegerSuffix(); } @@ -1682,7 +1708,7 @@ class Tokenizer sign = -1; } if (_pos >= _len) - parserError("Invalid exponent"); + return parserError("Invalid exponent", _sharedRealToken); ulong digits = 0; ulong number = 0; int i = _pos; @@ -1707,7 +1733,7 @@ class Tokenizer digits++; } if (digits == 0) - parserError("Invalid exponent"); + return parserError("Invalid exponent", _sharedRealToken); _pos = i; value *= pow(10., cast(long)number * sign); return processDecFloatSuffix(value); @@ -1757,7 +1783,7 @@ class Tokenizer _sharedIntegerToken.setPos(_line, _pos); _sharedRealToken.setPos(_line, _pos); if (_pos >= _len) - parserError("Unexpected end of line in number"); + return parserError("Unexpected end of line in number", _sharedIntegerToken); int digits = 0; ulong number = 0; int i = _pos; @@ -1783,7 +1809,7 @@ class Tokenizer } _pos = i; if (overflow) - parserError("number is too big to fit 64 bits"); + return parserError("number is too big to fit 64 bits", _sharedIntegerToken); _sharedIntegerToken.setValue(number); dchar next = _pos < _len ? _lineText[_pos] : 0; if (next == 0) @@ -1795,7 +1821,16 @@ class Tokenizer return processIntegerSuffix(); } - protected void parserError(string msg) { + /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag + protected Token parserError(string msg, Token incompleteToken, dchar currentChar = 0) { + return parserError(msg, incompleteToken.line, incompleteToken.pos, currentChar); + } + /// Either return InvalidToken or throw parser exception depending on current errorTolerant flag + protected Token parserError(string msg, int startLine, int startPos, dchar currentChar = 0) { + if (_errorTolerant) { + _sharedInvalidToken.setPos(startLine, startPos); + return _sharedInvalidToken; + } throw new ParserException(msg, _lineStream.file.filename, _line, _pos); } @@ -2264,11 +2299,11 @@ class Tokenizer if (ch == 'c' || ch == 'w' || ch == 'd') t = ch; else if (isIdentMiddleChar(ch)) - parserError("Unexpected character after string literal"); + return parserError("Unexpected character after string literal", _sharedStringLiteralToken); } if (t != 0) { if (type != 0 && t != type) - parserError("Cannot concatenate strings of different type"); + return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken); type = t; } if (!wysiwyg) { @@ -2324,7 +2359,7 @@ class Tokenizer case Keyword.VERSION_: // Compiler version as an integer, such as 2001 return makeSpecialTokenString(VERSION, pos); default: - parserError("Unexpected token"); + parserError("Unknown special token", _line, pos); } return null; } diff --git a/src/dlangide/ui/frame.d b/src/dlangide/ui/frame.d index 01e6c27..28b6ca6 100644 --- a/src/dlangide/ui/frame.d +++ b/src/dlangide/ui/frame.d @@ -34,6 +34,7 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter { _file = new SourceFile(filename); _lines = new ArraySourceLines(); _tokenizer = new Tokenizer(_lines); + _tokenizer.errorTolerant = true; } TokenPropString[] _props; @@ -88,6 +89,18 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter { case TokenType.STRING: category = TokenCategory.String; break; + case TokenType.CHARACTER: + category = TokenCategory.Character; + break; + case TokenType.INTEGER: + category = TokenCategory.Integer; + break; + case TokenType.FLOAT: + category = TokenCategory.FLoat; + break; + case TokenType.INVALID: + category = TokenCategory.Error; + break; default: category = 0; break; @@ -97,7 +110,7 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter { } } catch (Exception e) { - log.e("exception while trying to parse D source", e); + Log.e("exception while trying to parse D source", e); } _lines.close(); _props = null; @@ -113,7 +126,8 @@ class DSourceEdit : SourceEdit { backgroundColor = 0xFFFFFF; setTokenHightlightColor(TokenCategory.Comment, 0x008000); // green setTokenHightlightColor(TokenCategory.Keyword, 0x0000FF); // blue - setTokenHightlightColor(TokenCategory.String, 0xA31515); // red + setTokenHightlightColor(TokenCategory.String, 0xA31515); // brown + setTokenHightlightColor(TokenCategory.Error, 0xFF0000); // red setTokenHightlightColor(TokenCategory.Comment_Documentation, 0x206000); //setTokenHightlightColor(TokenCategory.Identifier, 0x206000); // no colors }