mirror of https://github.com/buggins/dlangide.git
error tolerant mode for tokenizer
This commit is contained in:
parent
6036905692
commit
c06627937d
|
|
@ -20,7 +20,8 @@ enum TokenType : ubyte {
|
||||||
INTEGER,
|
INTEGER,
|
||||||
FLOAT,
|
FLOAT,
|
||||||
KEYWORD,
|
KEYWORD,
|
||||||
OP
|
OP,
|
||||||
|
INVALID
|
||||||
}
|
}
|
||||||
|
|
||||||
// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _
|
// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _
|
||||||
|
|
@ -245,16 +246,18 @@ const uint[1728] UNIVERSAL_ALPHA_FLAGS = [
|
||||||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff
|
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff
|
||||||
];
|
];
|
||||||
|
|
||||||
// returns true if character is A..Z, a..z, _ or universal alpha
|
/// returns true if character is A..Z, a..z, _ or universal alpha
|
||||||
public bool isUniversalAlpha(dchar ch) pure nothrow {
|
bool isUniversalAlpha(dchar ch) pure nothrow {
|
||||||
return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31))));
|
return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31))));
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool isIdentStartChar(dchar ch) pure nothrow {
|
/// character can present at the beginning of identifier
|
||||||
|
bool isIdentStartChar(dchar ch) pure nothrow {
|
||||||
return isUniversalAlpha(ch);
|
return isUniversalAlpha(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool isIdentMiddleChar(dchar ch) pure nothrow {
|
/// character can present in middle of identifier
|
||||||
|
bool isIdentMiddleChar(dchar ch) pure nothrow {
|
||||||
return (ch >= '0' && ch <='9') || isUniversalAlpha(ch);
|
return (ch >= '0' && ch <='9') || isUniversalAlpha(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1025,7 +1028,6 @@ class KeywordToken : Token {
|
||||||
}
|
}
|
||||||
|
|
||||||
// do we need comment text?
|
// do we need comment text?
|
||||||
|
|
||||||
class CommentToken : Token {
|
class CommentToken : Token {
|
||||||
protected dchar[] _text;
|
protected dchar[] _text;
|
||||||
protected bool _isDocumentationComment;
|
protected bool _isDocumentationComment;
|
||||||
|
|
@ -1048,13 +1050,34 @@ class CommentToken : Token {
|
||||||
_text = text;
|
_text = text;
|
||||||
}
|
}
|
||||||
override public Token clone() {
|
override public Token clone() {
|
||||||
return new CommentToken(_file, _line, _pos, _text);
|
return new CommentToken(_file, _line, _pos, _text.dup);
|
||||||
}
|
}
|
||||||
public override @property string toString() {
|
public override @property string toString() {
|
||||||
return "Comment:" ~ to!string(_text);
|
return "Comment:" ~ to!string(_text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Invalid token holder - for error tolerant parsing
|
||||||
|
class InvalidToken : Token {
|
||||||
|
protected dchar[] _text;
|
||||||
|
|
||||||
|
@property override dchar[] text() { return _text; }
|
||||||
|
@property void text(dchar[] text) { _text = text; }
|
||||||
|
this() {
|
||||||
|
super(TokenType.INVALID);
|
||||||
|
}
|
||||||
|
this(SourceFile file, uint line, uint pos, dchar[] text) {
|
||||||
|
super(TokenType.INVALID, file, line, pos);
|
||||||
|
_text = text;
|
||||||
|
}
|
||||||
|
override Token clone() {
|
||||||
|
return new InvalidToken(_file, _line, _pos, _text.dup);
|
||||||
|
}
|
||||||
|
override @property string toString() {
|
||||||
|
return "Invalid:" ~ to!string(_text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
alias tokenizer_ident_t = uint;
|
alias tokenizer_ident_t = uint;
|
||||||
alias tokenizer_ident_name_t = dchar[];
|
alias tokenizer_ident_name_t = dchar[];
|
||||||
|
|
||||||
|
|
@ -1291,6 +1314,7 @@ class Tokenizer
|
||||||
protected KeywordToken _sharedKeywordToken = new KeywordToken();
|
protected KeywordToken _sharedKeywordToken = new KeywordToken();
|
||||||
protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken();
|
protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken();
|
||||||
protected RealLiteralToken _sharedRealToken = new RealLiteralToken();
|
protected RealLiteralToken _sharedRealToken = new RealLiteralToken();
|
||||||
|
protected InvalidToken _sharedInvalidToken = new InvalidToken();
|
||||||
protected StringAppender _stringLiteralAppender;
|
protected StringAppender _stringLiteralAppender;
|
||||||
protected StringAppender _commentAppender;
|
protected StringAppender _commentAppender;
|
||||||
protected StringAppender _identAppender;
|
protected StringAppender _identAppender;
|
||||||
|
|
@ -1321,14 +1345,16 @@ class Tokenizer
|
||||||
|
|
||||||
void init(SourceLines lineStream) {
|
void init(SourceLines lineStream) {
|
||||||
_lineStream = lineStream;
|
_lineStream = lineStream;
|
||||||
_sharedWhiteSpaceToken.setFile(_lineStream.file);
|
SourceFile file = _lineStream.file;
|
||||||
_sharedCommentToken.setFile(_lineStream.file);
|
_sharedWhiteSpaceToken.setFile(file);
|
||||||
_sharedStringLiteralToken.setFile(_lineStream.file);
|
_sharedCommentToken.setFile(file);
|
||||||
_sharedIdentToken.setFile(_lineStream.file);
|
_sharedStringLiteralToken.setFile(file);
|
||||||
_sharedOpToken.setFile(_lineStream.file);
|
_sharedIdentToken.setFile(file);
|
||||||
_sharedKeywordToken.setFile(_lineStream.file);
|
_sharedOpToken.setFile(file);
|
||||||
_sharedIntegerToken.setFile(_lineStream.file);
|
_sharedKeywordToken.setFile(file);
|
||||||
_sharedRealToken.setFile(_lineStream.file);
|
_sharedIntegerToken.setFile(file);
|
||||||
|
_sharedRealToken.setFile(file);
|
||||||
|
_sharedInvalidToken.setFile(file);
|
||||||
buildTime = Clock.currTime();
|
buildTime = Clock.currTime();
|
||||||
_line = lineStream.line;
|
_line = lineStream.line;
|
||||||
_pos = 0;
|
_pos = 0;
|
||||||
|
|
@ -1572,7 +1598,7 @@ class Tokenizer
|
||||||
_sharedIntegerToken.setFlags(unsignedFlag, longFlag);
|
_sharedIntegerToken.setFlags(unsignedFlag, longFlag);
|
||||||
ch = _pos < _len ? _lineText[_pos] : 0;
|
ch = _pos < _len ? _lineText[_pos] : 0;
|
||||||
if (isIdentMiddleChar(ch))
|
if (isIdentMiddleChar(ch))
|
||||||
parserError("Unexpected character after number");
|
return parserError("Unexpected character after number", _sharedIntegerToken);
|
||||||
return _sharedIntegerToken;
|
return _sharedIntegerToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1580,7 +1606,7 @@ class Tokenizer
|
||||||
_sharedIntegerToken.setPos(_line, _pos - 1);
|
_sharedIntegerToken.setPos(_line, _pos - 1);
|
||||||
_pos++;
|
_pos++;
|
||||||
if (_pos >= _len)
|
if (_pos >= _len)
|
||||||
parserError("Unexpected end of line in binary number");
|
return parserError("Unexpected end of line in binary number", _sharedIntegerToken);
|
||||||
int digits = 0;
|
int digits = 0;
|
||||||
ulong number = 0;
|
ulong number = 0;
|
||||||
int i = _pos;
|
int i = _pos;
|
||||||
|
|
@ -1593,7 +1619,7 @@ class Tokenizer
|
||||||
}
|
}
|
||||||
_pos = i;
|
_pos = i;
|
||||||
if (digits > 64)
|
if (digits > 64)
|
||||||
parserError("number is too big");
|
return parserError("number is too big", _sharedIntegerToken);
|
||||||
_sharedIntegerToken.setValue(number);
|
_sharedIntegerToken.setValue(number);
|
||||||
return processIntegerSuffix();
|
return processIntegerSuffix();
|
||||||
}
|
}
|
||||||
|
|
@ -1603,7 +1629,7 @@ class Tokenizer
|
||||||
_sharedRealToken.setPos(_line, _pos - 1);
|
_sharedRealToken.setPos(_line, _pos - 1);
|
||||||
_pos++;
|
_pos++;
|
||||||
if (_pos >= _len)
|
if (_pos >= _len)
|
||||||
parserError("Unexpected end of line in hex number");
|
return parserError("Unexpected end of line in hex number", _sharedIntegerToken);
|
||||||
int digits = 0;
|
int digits = 0;
|
||||||
ulong number = 0;
|
ulong number = 0;
|
||||||
int i = _pos;
|
int i = _pos;
|
||||||
|
|
@ -1625,7 +1651,7 @@ class Tokenizer
|
||||||
}
|
}
|
||||||
_pos = i;
|
_pos = i;
|
||||||
if (digits > 16)
|
if (digits > 16)
|
||||||
parserError("number is too big to fit 64 bits");
|
return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
|
||||||
_sharedIntegerToken.setValue(number);
|
_sharedIntegerToken.setValue(number);
|
||||||
return processIntegerSuffix();
|
return processIntegerSuffix();
|
||||||
}
|
}
|
||||||
|
|
@ -1633,7 +1659,7 @@ class Tokenizer
|
||||||
protected Token processOctNumber() {
|
protected Token processOctNumber() {
|
||||||
_sharedIntegerToken.setPos(_line, _pos - 1);
|
_sharedIntegerToken.setPos(_line, _pos - 1);
|
||||||
if (_pos >= _len)
|
if (_pos >= _len)
|
||||||
parserError("Unexpected end of line in octal number");
|
return parserError("Unexpected end of line in octal number", _sharedIntegerToken);
|
||||||
int digits = 0;
|
int digits = 0;
|
||||||
ulong number = 0;
|
ulong number = 0;
|
||||||
int i = _pos;
|
int i = _pos;
|
||||||
|
|
@ -1659,7 +1685,7 @@ class Tokenizer
|
||||||
}
|
}
|
||||||
_pos = i;
|
_pos = i;
|
||||||
if (overflow)
|
if (overflow)
|
||||||
parserError("number is too big to fit 64 bits");
|
return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
|
||||||
_sharedIntegerToken.setValue(number);
|
_sharedIntegerToken.setValue(number);
|
||||||
return processIntegerSuffix();
|
return processIntegerSuffix();
|
||||||
}
|
}
|
||||||
|
|
@ -1682,7 +1708,7 @@ class Tokenizer
|
||||||
sign = -1;
|
sign = -1;
|
||||||
}
|
}
|
||||||
if (_pos >= _len)
|
if (_pos >= _len)
|
||||||
parserError("Invalid exponent");
|
return parserError("Invalid exponent", _sharedRealToken);
|
||||||
ulong digits = 0;
|
ulong digits = 0;
|
||||||
ulong number = 0;
|
ulong number = 0;
|
||||||
int i = _pos;
|
int i = _pos;
|
||||||
|
|
@ -1707,7 +1733,7 @@ class Tokenizer
|
||||||
digits++;
|
digits++;
|
||||||
}
|
}
|
||||||
if (digits == 0)
|
if (digits == 0)
|
||||||
parserError("Invalid exponent");
|
return parserError("Invalid exponent", _sharedRealToken);
|
||||||
_pos = i;
|
_pos = i;
|
||||||
value *= pow(10., cast(long)number * sign);
|
value *= pow(10., cast(long)number * sign);
|
||||||
return processDecFloatSuffix(value);
|
return processDecFloatSuffix(value);
|
||||||
|
|
@ -1757,7 +1783,7 @@ class Tokenizer
|
||||||
_sharedIntegerToken.setPos(_line, _pos);
|
_sharedIntegerToken.setPos(_line, _pos);
|
||||||
_sharedRealToken.setPos(_line, _pos);
|
_sharedRealToken.setPos(_line, _pos);
|
||||||
if (_pos >= _len)
|
if (_pos >= _len)
|
||||||
parserError("Unexpected end of line in number");
|
return parserError("Unexpected end of line in number", _sharedIntegerToken);
|
||||||
int digits = 0;
|
int digits = 0;
|
||||||
ulong number = 0;
|
ulong number = 0;
|
||||||
int i = _pos;
|
int i = _pos;
|
||||||
|
|
@ -1783,7 +1809,7 @@ class Tokenizer
|
||||||
}
|
}
|
||||||
_pos = i;
|
_pos = i;
|
||||||
if (overflow)
|
if (overflow)
|
||||||
parserError("number is too big to fit 64 bits");
|
return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
|
||||||
_sharedIntegerToken.setValue(number);
|
_sharedIntegerToken.setValue(number);
|
||||||
dchar next = _pos < _len ? _lineText[_pos] : 0;
|
dchar next = _pos < _len ? _lineText[_pos] : 0;
|
||||||
if (next == 0)
|
if (next == 0)
|
||||||
|
|
@ -1795,7 +1821,16 @@ class Tokenizer
|
||||||
return processIntegerSuffix();
|
return processIntegerSuffix();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void parserError(string msg) {
|
/// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
|
||||||
|
protected Token parserError(string msg, Token incompleteToken, dchar currentChar = 0) {
|
||||||
|
return parserError(msg, incompleteToken.line, incompleteToken.pos, currentChar);
|
||||||
|
}
|
||||||
|
/// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
|
||||||
|
protected Token parserError(string msg, int startLine, int startPos, dchar currentChar = 0) {
|
||||||
|
if (_errorTolerant) {
|
||||||
|
_sharedInvalidToken.setPos(startLine, startPos);
|
||||||
|
return _sharedInvalidToken;
|
||||||
|
}
|
||||||
throw new ParserException(msg, _lineStream.file.filename, _line, _pos);
|
throw new ParserException(msg, _lineStream.file.filename, _line, _pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2264,11 +2299,11 @@ class Tokenizer
|
||||||
if (ch == 'c' || ch == 'w' || ch == 'd')
|
if (ch == 'c' || ch == 'w' || ch == 'd')
|
||||||
t = ch;
|
t = ch;
|
||||||
else if (isIdentMiddleChar(ch))
|
else if (isIdentMiddleChar(ch))
|
||||||
parserError("Unexpected character after string literal");
|
return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
|
||||||
}
|
}
|
||||||
if (t != 0) {
|
if (t != 0) {
|
||||||
if (type != 0 && t != type)
|
if (type != 0 && t != type)
|
||||||
parserError("Cannot concatenate strings of different type");
|
return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken);
|
||||||
type = t;
|
type = t;
|
||||||
}
|
}
|
||||||
if (!wysiwyg) {
|
if (!wysiwyg) {
|
||||||
|
|
@ -2324,7 +2359,7 @@ class Tokenizer
|
||||||
case Keyword.VERSION_: // Compiler version as an integer, such as 2001
|
case Keyword.VERSION_: // Compiler version as an integer, such as 2001
|
||||||
return makeSpecialTokenString(VERSION, pos);
|
return makeSpecialTokenString(VERSION, pos);
|
||||||
default:
|
default:
|
||||||
parserError("Unexpected token");
|
parserError("Unknown special token", _line, pos);
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter {
|
||||||
_file = new SourceFile(filename);
|
_file = new SourceFile(filename);
|
||||||
_lines = new ArraySourceLines();
|
_lines = new ArraySourceLines();
|
||||||
_tokenizer = new Tokenizer(_lines);
|
_tokenizer = new Tokenizer(_lines);
|
||||||
|
_tokenizer.errorTolerant = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenPropString[] _props;
|
TokenPropString[] _props;
|
||||||
|
|
@ -88,6 +89,18 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter {
|
||||||
case TokenType.STRING:
|
case TokenType.STRING:
|
||||||
category = TokenCategory.String;
|
category = TokenCategory.String;
|
||||||
break;
|
break;
|
||||||
|
case TokenType.CHARACTER:
|
||||||
|
category = TokenCategory.Character;
|
||||||
|
break;
|
||||||
|
case TokenType.INTEGER:
|
||||||
|
category = TokenCategory.Integer;
|
||||||
|
break;
|
||||||
|
case TokenType.FLOAT:
|
||||||
|
category = TokenCategory.FLoat;
|
||||||
|
break;
|
||||||
|
case TokenType.INVALID:
|
||||||
|
category = TokenCategory.Error;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
category = 0;
|
category = 0;
|
||||||
break;
|
break;
|
||||||
|
|
@ -97,7 +110,7 @@ class SimpleDSyntaxHighlighter : SyntaxHighlighter {
|
||||||
|
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.e("exception while trying to parse D source", e);
|
Log.e("exception while trying to parse D source", e);
|
||||||
}
|
}
|
||||||
_lines.close();
|
_lines.close();
|
||||||
_props = null;
|
_props = null;
|
||||||
|
|
@ -113,7 +126,8 @@ class DSourceEdit : SourceEdit {
|
||||||
backgroundColor = 0xFFFFFF;
|
backgroundColor = 0xFFFFFF;
|
||||||
setTokenHightlightColor(TokenCategory.Comment, 0x008000); // green
|
setTokenHightlightColor(TokenCategory.Comment, 0x008000); // green
|
||||||
setTokenHightlightColor(TokenCategory.Keyword, 0x0000FF); // blue
|
setTokenHightlightColor(TokenCategory.Keyword, 0x0000FF); // blue
|
||||||
setTokenHightlightColor(TokenCategory.String, 0xA31515); // red
|
setTokenHightlightColor(TokenCategory.String, 0xA31515); // brown
|
||||||
|
setTokenHightlightColor(TokenCategory.Error, 0xFF0000); // red
|
||||||
setTokenHightlightColor(TokenCategory.Comment_Documentation, 0x206000);
|
setTokenHightlightColor(TokenCategory.Comment_Documentation, 0x206000);
|
||||||
//setTokenHightlightColor(TokenCategory.Identifier, 0x206000); // no colors
|
//setTokenHightlightColor(TokenCategory.Identifier, 0x206000); // no colors
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue