diff --git a/dlangide.visualdproj b/dlangide.visualdproj
index 5933e03..96b9e75 100644
--- a/dlangide.visualdproj
+++ b/dlangide.visualdproj
@@ -66,7 +66,7 @@
0
0
- Unicode
+ Unicode USE_SDL USE_OPENGL
0
3
0
@@ -189,6 +189,17 @@
*.obj;*.cmd;*.build;*.json;*.dep
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/ddc/lexer/Lexer.d b/src/ddc/lexer/Lexer.d
new file mode 100644
index 0000000..808070c
--- /dev/null
+++ b/src/ddc/lexer/Lexer.d
@@ -0,0 +1,288 @@
+// D grammar - according to http://dlang.org/grammar
+
+module ddc.lexer.Lexer;
+import ddc.lexer.LineStream;
+import ddc.lexer.Tokenizer;
+
+/** Lexem type constants */
+enum LexemType : ushort {
+ UNKNOWN,
+ // types
+ TYPE,
+ TYPE_CTORS,
+ TYPE_CTOR,
+ BASIC_TYPE,
+ BASIC_TYPE_X,
+ BASIC_TYPE_2,
+ IDENTIFIER_LIST,
+ IDENTIFIER,
+ TYPEOF,
+ // templates
+ TEMPLATE_INSTANCE,
+ EXPRESSION,
+ ALT_DECLARATOR,
+}
+
+class Lexem {
+ public @property LexemType type() { return LexemType.UNKNOWN; }
+}
+
+/**
+ Returns true for one of keywords: bool, byte, ubyte, short, ushort, int, uint, long, ulong,
+ char, wchar, dchar, float, double, real, ifloat, idouble, ireal, cfloat, cdouble, creal, void
+*/
+bool isBasicTypeXToken(Token token) {
+ if (token.type != TokenType.KEYWORD)
+ return false;
+ Keyword id = token.keyword;
+ return id == Keyword.BOOL
+ || id == Keyword.BYTE
+ || id == Keyword.UBYTE
+ || id == Keyword.SHORT
+ || id == Keyword.USHORT
+ || id == Keyword.INT
+ || id == Keyword.UINT
+ || id == Keyword.LONG
+ || id == Keyword.ULONG
+ || id == Keyword.CHAR
+ || id == Keyword.WCHAR
+ || id == Keyword.DCHAR
+ || id == Keyword.FLOAT
+ || id == Keyword.DOUBLE
+ || id == Keyword.REAL
+ || id == Keyword.IFLOAT
+ || id == Keyword.IDOUBLE
+ || id == Keyword.IREAL
+ || id == Keyword.CFLOAT
+ || id == Keyword.CDOUBLE
+ || id == Keyword.CREAL
+ || id == Keyword.VOID;
+}
+
+/**
+ Single token, one of keywords: bool, byte, ubyte, short, ushort, int, uint, long, ulong,
+ char, wchar, dchar, float, double, real, ifloat, idouble, ireal, cfloat, cdouble, creal, void
+*/
+class BasicTypeX : Lexem {
+ public Token _token;
+ public override @property LexemType type() { return LexemType.BASIC_TYPE_X; }
+ public this(Token token)
+ in {
+ assert(isBasicTypeXToken(token));
+ }
+ body {
+ _token = token;
+ }
+}
+
+/**
+ Returns true for one of keywords: const, immutable, inout, shared
+*/
+bool isTypeCtorToken(Token token) {
+ if (token.type != TokenType.KEYWORD)
+ return false;
+ Keyword id = token.keyword;
+ return id == Keyword.CONST
+ || id == Keyword.IMMUTABLE
+ || id == Keyword.INOUT
+ || id == Keyword.SHARED;
+}
+
+/**
+ Single token, one of keywords: const, immutable, inout, shared
+*/
+class TypeCtor : Lexem {
+ public Token _token;
+ public override @property LexemType type() { return LexemType.TYPE_CTOR; }
+ public this(Token token)
+ in {
+ assert(isTypeCtorToken(token));
+ }
+ body {
+ _token = token;
+ }
+}
+
+/**
+ Zero, one or several keywords: const, immutable, inout, shared
+*/
+class TypeCtors : Lexem {
+ public TypeCtor[] _list;
+ public override @property LexemType type() { return LexemType.TYPE_CTORS; }
+ public this(Token token)
+ in {
+ assert(isTypeCtorToken(token));
+ }
+ body {
+ _list ~= new TypeCtor(token);
+ }
+ public void append(Token token)
+ in {
+ assert(isTypeCtorToken(token));
+ }
+ body {
+ _list ~= new TypeCtor(token);
+ }
+}
+
+/**
+ Identifier.
+*/
+class Identifier : Lexem {
+ IdentToken _token;
+ public override @property LexemType type() { return LexemType.IDENTIFIER; }
+ public this(Token identifier)
+ in {
+ assert(identifier.type == TokenType.IDENTIFIER);
+ }
+ body {
+ _token = cast(IdentToken)identifier;
+ }
+}
+
+/**
+ Identifier list.
+
+ IdentifierList:
+ Identifier
+ Identifier . IdentifierList
+ TemplateInstance
+ TemplateInstance . IdentifierList
+ */
+class IdentifierList : Lexem {
+ public Identifier _identifier;
+ public IdentifierList _identifierList;
+ public TemplateInstance _templateInstance;
+ public override @property LexemType type() { return LexemType.IDENTIFIER_LIST; }
+ public this(Token ident, IdentifierList identifierList = null)
+ in {
+ assert(ident.type == TokenType.IDENTIFIER);
+ }
+ body {
+ _identifier = new Identifier(ident);
+ _identifierList = identifierList;
+ }
+ public this(TemplateInstance templateInstance, IdentifierList identifierList = null)
+ in {
+ }
+ body {
+ _templateInstance = templateInstance;
+ _identifierList = identifierList;
+ }
+}
+
+/**
+ Template instance.
+
+ TemplateInstance:
+ Identifier TemplateArguments
+*/
+class TemplateInstance : Lexem {
+ public override @property LexemType type() { return LexemType.TEMPLATE_INSTANCE; }
+ public this()
+ in {
+ }
+ body {
+ }
+}
+
+/**
+ Basic type.
+
+ BasicType:
+ BasicTypeX
+ . IdentifierList
+ IdentifierList
+ Typeof
+ Typeof . IdentifierList
+ TypeCtor ( Type )
+*/
+class BasicType : Lexem {
+ public BasicTypeX _basicTypeX;
+ public IdentifierList _identifierList;
+ public Typeof _typeof;
+ public TypeCtor _typeCtor;
+ public Type _typeCtorType;
+ public bool _dotBeforeIdentifierList;
+ public override @property LexemType type() { return LexemType.BASIC_TYPE; }
+ public this()
+ in {
+ }
+ body {
+ }
+}
+
+
+
+/**
+ Typeof.
+
+ Typeof:
+ typeof ( Expression )
+ typeof ( return )
+
+ For typeof(return), _expression is null
+*/
+class Typeof : Lexem {
+ public Expression _expression;
+ public override @property LexemType type() { return LexemType.TYPEOF; }
+ public this(Expression expression)
+ in {
+ }
+ body {
+ _expression = expression;
+ }
+}
+
+/**
+ Type.
+
+*/
+class Type : Lexem {
+ public TypeCtors _typeCtors;
+ public BasicType _basicType;
+ public AltDeclarator _altDeclarator;
+ public override @property LexemType type() { return LexemType.TYPE; }
+ public this()
+ in {
+ }
+ body {
+ }
+}
+
+/**
+ Expression.
+
+ Expression:
+*/
+class Expression : Lexem {
+ public override @property LexemType type() { return LexemType.EXPRESSION; }
+ public this()
+ in {
+ }
+ body {
+ }
+}
+
+/**
+ AltDeclarator.
+
+ AltDeclarator:
+*/
+class AltDeclarator : Lexem {
+ public override @property LexemType type() { return LexemType.ALT_DECLARATOR; }
+ public this()
+ in {
+ }
+ body {
+ }
+}
+
+class Lexer
+{
+ LineStream _lineStream;
+ this(LineStream lineStream)
+ {
+ _lineStream = lineStream;
+ }
+}
diff --git a/src/ddc/lexer/LexerException.d b/src/ddc/lexer/LexerException.d
new file mode 100644
index 0000000..0d0aae2
--- /dev/null
+++ b/src/ddc/lexer/LexerException.d
@@ -0,0 +1,10 @@
+module ddc.lexer.LexerException;
+
+class LexerException
+{
+ this()
+ {
+ // Constructor code
+ }
+}
+
diff --git a/src/ddc/lexer/LineStream.d b/src/ddc/lexer/LineStream.d
new file mode 100644
index 0000000..7f1f063
--- /dev/null
+++ b/src/ddc/lexer/LineStream.d
@@ -0,0 +1,589 @@
+module ddc.lexer.LineStream;
+
+import std.stream;
+import ddc.lexer.exceptions;
+import std.stdio;
+import std.conv;
+import ddc.lexer.textsource;
+
+class LineStream : SourceLines {
+ public enum EncodingType {
+ ASCII,
+ UTF8,
+ UTF16BE,
+ UTF16LE,
+ UTF32BE,
+ UTF32LE
+ };
+
+ static immutable uint LINE_POSITION_UNDEFINED = uint.max;
+ static immutable int TEXT_BUFFER_SIZE = 1024;
+ static immutable int BYTE_BUFFER_SIZE = 512;
+ static immutable int QUARTER_BYTE_BUFFER_SIZE = BYTE_BUFFER_SIZE / 4;
+
+ InputStream _stream;
+ string _filename;
+ SourceFile _file;
+ ubyte[] _buf; // stream reading buffer
+ uint _pos; // reading position of stream buffer
+ uint _len; // number of bytes in stream buffer
+ bool _streamEof; // true if input stream is in EOF state
+ uint _line; // current line number
+
+ uint _textPos; // start of text line in text buffer
+ uint _textLen; // position of last filled char in text buffer + 1
+ dchar[] _textBuf; // text buffer
+ bool _eof; // end of file, no more lines
+
+ override @property SourceFile file() { return _file; }
+ @property string filename() { return _file.filename; }
+ override @property uint line() { return _line; }
+ @property EncodingType encoding() { return _encoding; }
+ override @property int errorCode() { return _errorCode; }
+ override @property string errorMessage() { return _errorMessage; }
+ override @property int errorLine() { return _errorLine; }
+ override @property int errorPos() { return _errorPos; }
+
+ immutable EncodingType _encoding;
+
+ int _errorCode;
+ string _errorMessage;
+ uint _errorLine;
+ uint _errorPos;
+
+ protected this(InputStream stream, SourceFile file, EncodingType encoding, ubyte[] buf, uint offset, uint len) {
+ _file = file;
+ _stream = stream;
+ _encoding = encoding;
+ _buf = buf;
+ _len = len;
+ _pos = offset;
+ _streamEof = _stream.eof;
+ }
+
+ // returns slice of bytes available in buffer
+ uint readBytes() {
+ uint bytesLeft = _len - _pos;
+ if (_streamEof || bytesLeft > QUARTER_BYTE_BUFFER_SIZE)
+ return bytesLeft;
+ if (_pos > 0) {
+ for (uint i = 0; i < bytesLeft; i++)
+ _buf[i] = _buf[i + _pos];
+ _len = bytesLeft;
+ _pos = 0;
+ }
+ uint bytesRead = cast(uint)_stream.read(_buf[_len .. BYTE_BUFFER_SIZE]);
+ _len += bytesRead;
+ _streamEof = _stream.eof;
+ return _len - _pos; //_buf[_pos .. _len];
+ }
+
+ // when bytes consumed from byte buffer, call this method to update position
+ void consumedBytes(uint count) {
+ _pos += count;
+ }
+
+ // reserve text buffer for specified number of characters, and return pointer to first free character in buffer
+ dchar * reserveTextBuf(uint len) {
+ // create new text buffer if necessary
+ if (_textBuf == null) {
+ if (len < TEXT_BUFFER_SIZE)
+ len = TEXT_BUFFER_SIZE;
+ _textBuf = new dchar[len];
+ return _textBuf.ptr;
+ }
+ uint spaceLeft = cast(uint)_textBuf.length - _textLen;
+ if (spaceLeft >= len)
+ return _textBuf.ptr + _textLen;
+ // move text to beginning of buffer, if necessary
+ if (_textPos > _textBuf.length / 2) {
+ uint charCount = _textLen - _textPos;
+ dchar * p = _textBuf.ptr;
+ for (uint i = 0; i < charCount; i++)
+ p[i] = p[i + _textPos];
+ _textLen = charCount;
+ _textPos = 0;
+ }
+ // resize buffer if necessary
+ if (_textLen + len > _textBuf.length) {
+ // resize buffer
+ uint newsize = cast(uint)_textBuf.length * 2;
+ if (newsize < _textLen + len)
+ newsize = _textLen + len;
+ _textBuf.length = newsize;
+ }
+ return _textBuf.ptr + _textLen;
+ }
+
+ void appendedText(uint len) {
+ //writeln("appended ", len, " chars of text"); //:", _textBuf[_textLen .. _textLen + len]);
+ _textLen += len;
+ }
+
+ void setError(int code, string message, uint errorLine, uint errorPos) {
+ _errorCode = code;
+ _errorMessage = message;
+ _errorLine = errorLine;
+ _errorPos = errorPos;
+ }
+
+ // override to decode text
+ abstract uint decodeText();
+
+ override public dchar[] readLine() {
+ if (_errorCode != 0) {
+ //writeln("error ", _errorCode, ": ", _errorMessage, " in line ", _errorLine);
+ return null; // error detected
+ }
+ if (_eof) {
+ //writeln("EOF found");
+ return null;
+ }
+ _line++;
+ uint p = 0;
+ uint eol = LINE_POSITION_UNDEFINED;
+ uint eof = LINE_POSITION_UNDEFINED;
+ uint lastchar = LINE_POSITION_UNDEFINED;
+ do {
+ if (_errorCode != 0) {
+ //writeln("error ", _errorCode, ": ", _errorMessage, " in line ", _errorLine);
+ return null; // error detected
+ }
+ uint charsLeft = _textLen - _textPos;
+ if (p >= charsLeft) {
+ uint decodedChars = decodeText();
+ if (_errorCode != 0) {
+ return null; // error detected
+ }
+ charsLeft = _textLen - _textPos;
+ if (decodedChars == 0) {
+ eol = charsLeft;
+ eof = charsLeft;
+ lastchar = charsLeft;
+ break;
+ }
+ }
+ for (; p < charsLeft; p++) {
+ dchar ch = _textBuf[_textPos + p];
+ if (ch == 0x0D) {
+ lastchar = p;
+ if (p == charsLeft - 1) {
+ // need one more char to check if it's 0D0A or just 0D eol
+ //writeln("read one more char for 0D0A detection");
+ decodeText();
+ if (_errorCode != 0) {
+ return null; // error detected
+ }
+ charsLeft = _textLen - _textPos;
+ }
+ dchar ch2 = (p < charsLeft - 1) ? _textBuf[_textPos + p + 1] : 0;
+ if (ch2 == 0x0A)
+ eol = p + 2;
+ else
+ eol = p + 1;
+ break;
+ } else if (ch == 0x0A || ch == 0x2028 || ch == 0x2029) {
+ // single char eoln
+ lastchar = p;
+ eol = p + 1;
+ break;
+ } else if (ch == 0 || ch == 0x001A) {
+ // eof
+ //writeln("EOF char found");
+ lastchar = p;
+ eol = eof = p + 1;
+ break;
+ }
+ }
+ } while (eol == LINE_POSITION_UNDEFINED);
+ uint lineStart = _textPos;
+ uint lineEnd = _textPos + lastchar;
+ _textPos += eol; // consume text
+ if (eof != LINE_POSITION_UNDEFINED) {
+ _eof = true;
+ //writeln("Setting eof flag. lastchar=", lastchar, ", p=", p, ", lineStart=", lineStart);
+ if (lineStart >= lineEnd) {
+ //writeln("lineStart >= lineEnd -- treat as eof");
+ return null; // eof
+ }
+ }
+ // return slice with decoded line
+ return _textBuf[lineStart .. lineEnd];
+ }
+
+
+ // factory for string parser
+ public static LineStream create(string code, string filename = "") {
+ uint len = cast(uint)code.length;
+ ubyte[] data = new ubyte[len + 3];
+ for (uint i = 0; i < len; i++)
+ data[i + 3] = code[i];
+ // BOM for UTF8
+ data[0] = 0xEF;
+ data[1] = 0xBB;
+ data[2] = 0xBF;
+ MemoryStream stream = new MemoryStream(data);
+ return create(stream, filename);
+ }
+
+ // factory
+ public static LineStream create(InputStream stream, string filename) {
+ ubyte[] buf = new ubyte[BYTE_BUFFER_SIZE];
+ buf[0] = buf[1] = buf[2] = buf[3] = 0;
+ if (!stream.isOpen)
+ return null;
+ uint len = cast(uint)stream.read(buf);
+ if (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) {
+ return new Utf8LineStream(stream, filename, buf, len);
+ } else if (buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0xFE && buf[3] == 0xFF) {
+ return new Utf32beLineStream(stream, filename, buf, len);
+ } else if (buf[0] == 0xFF && buf[1] == 0xFE && buf[2] == 0x00 && buf[3] == 0x00) {
+ return new Utf32leLineStream(stream, filename, buf, len);
+ } else if (buf[0] == 0xFE && buf[1] == 0xFF) {
+ return new Utf16beLineStream(stream, filename, buf, len);
+ } else if (buf[0] == 0xFF && buf[1] == 0xFE) {
+ return new Utf16leLineStream(stream, filename, buf, len);
+ } else {
+ return new AsciiLineStream(stream, filename, buf, len);
+ }
+ }
+
+ protected bool invalidCharFlag;
+ protected void invalidCharError() {
+ uint pos = _textLen - _textPos + 1;
+ setError(1, "Invalid character in line " ~ to!string(_line) ~ ":" ~ to!string(pos), _line, pos);
+ }
+}
+
+
+
+class AsciiLineStream : LineStream {
+ this(InputStream stream, string filename, ubyte[] buf, uint len) {
+ super(stream, new SourceFile(filename), EncodingType.ASCII, buf, 0, len);
+ }
+ override uint decodeText() {
+ if (invalidCharFlag) {
+ invalidCharError();
+ return 0;
+ }
+ uint bytesAvailable = readBytes();
+ ubyte * bytes = _buf.ptr + _pos;
+ if (bytesAvailable == 0)
+ return 0; // nothing to decode
+ uint len = bytesAvailable;
+ ubyte* b = bytes;
+ dchar* text = reserveTextBuf(len);
+ uint i = 0;
+ for (; i < len; i++) {
+ ubyte ch = b[i];
+ if (ch & 0x80) {
+ // invalid character
+ invalidCharFlag = true;
+ break;
+ }
+ text[i] = ch;
+ }
+ consumedBytes(i);
+ appendedText(i);
+ return len;
+ }
+
+}
+
+class Utf8LineStream : LineStream {
+ this(InputStream stream, string filename, ubyte[] buf, uint len) {
+ super(stream, new SourceFile(filename), EncodingType.UTF8, buf, 3, len);
+ }
+ override uint decodeText() {
+ if (invalidCharFlag) {
+ invalidCharError();
+ return 0;
+ }
+ uint bytesAvailable = readBytes();
+ ubyte * bytes = _buf.ptr + _pos;
+ if (bytesAvailable == 0)
+ return 0; // nothing to decode
+ uint len = bytesAvailable;
+ uint chars = 0;
+ ubyte* b = bytes;
+ dchar* text = reserveTextBuf(len);
+ uint i = 0;
+ for (; i < len; i++) {
+ uint ch = 0;
+ uint ch0 = b[i];
+ uint bleft = len - i;
+ uint bread = 0;
+ if (!(ch0 & 0x80)) {
+ // 0x00..0x7F single byte
+ ch = ch0;
+ bread = 1;
+ } if ((ch0 & 0xE0) == 0xC0) {
+ // two bytes 110xxxxx 10xxxxxx
+ if (bleft < 2)
+ break;
+ uint ch1 = b[i + 1];
+ if ((ch1 & 0xC0) != 0x80) {
+ invalidCharFlag = true;
+ break;
+ }
+ ch = ((ch0 & 0x1F) << 6) | ((ch1 & 0x3F));
+ bread = 2;
+ } if ((ch0 & 0xF0) == 0xE0) {
+ // three bytes 1110xxxx 10xxxxxx 10xxxxxx
+ if (bleft < 3)
+ break;
+ uint ch1 = b[i + 1];
+ uint ch2 = b[i + 2];
+ if ((ch1 & 0xC0) != 0x80 || (ch2 & 0xC0) != 0x80) {
+ invalidCharFlag = true;
+ break;
+ }
+ ch = ((ch0 & 0x0F) << 12) | ((ch1 & 0x1F) << 6) | ((ch2 & 0x3F));
+ bread = 3;
+ } if ((ch0 & 0xF8) == 0xF0) {
+ // four bytes 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ if (bleft < 4)
+ break;
+ uint ch1 = b[i + 1];
+ uint ch2 = b[i + 2];
+ uint ch3 = b[i + 3];
+ if ((ch1 & 0xC0) != 0x80 || (ch2 & 0xC0) != 0x80 || (ch3 & 0xC0) != 0x80) {
+ invalidCharFlag = true;
+ break;
+ }
+ ch = ((ch0 & 0x07) << 18) | ((ch1 & 0x3F) << 12) | ((ch2 & 0x3F) << 6) | ((ch3 & 0x3F));
+ bread = 4;
+ } if ((ch0 & 0xFC) == 0xF8) {
+ // five bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ if (bleft < 5)
+ break;
+ uint ch1 = b[i + 1];
+ uint ch2 = b[i + 2];
+ uint ch3 = b[i + 3];
+ uint ch4 = b[i + 4];
+ if ((ch1 & 0xC0) != 0x80 || (ch2 & 0xC0) != 0x80 || (ch3 & 0xC0) != 0x80 || (ch4 & 0xC0) != 0x80) {
+ invalidCharFlag = true;
+ break;
+ }
+ ch = ((ch0 & 0x03) << 24) | ((ch1 & 0x3F) << 18) | ((ch2 & 0x3F) << 12) | ((ch3 & 0x3F) << 6) | ((ch4 & 0x3F));
+ bread = 5;
+ } if ((ch0 & 0xFE) == 0xFC) {
+ // six bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ if (bleft < 6)
+ break;
+ uint ch1 = b[i + 1];
+ uint ch2 = b[i + 2];
+ uint ch3 = b[i + 3];
+ uint ch4 = b[i + 4];
+ uint ch5 = b[i + 5];
+ if ((ch1 & 0xC0) != 0x80 || (ch2 & 0xC0) != 0x80 || (ch3 & 0xC0) != 0x80 || (ch4 & 0xC0) != 0x80 || (ch5 & 0xC0) != 0x80) {
+ invalidCharFlag = true;
+ break;
+ }
+ ch = ((ch0 & 0x01) << 30) | ((ch1 & 0x3F) << 24) | ((ch2 & 0x3F) << 18) | ((ch3 & 0x3F) << 12) | ((ch4 & 0x3F) << 6) | ((ch5 & 0x3F));
+ bread = 5;
+ }
+ if ((ch >= 0xd800 && ch < 0xe000) || (ch > 0x10FFFF)) {
+ invalidCharFlag = true;
+ break;
+ }
+ if (ch < 0x10000) {
+ text[chars++] = ch;
+ } else {
+ uint lo = ch & 0x3FF;
+ uint hi = ch >> 10;
+ text[chars++] = (0xd800 | hi);
+ text[chars++] = (0xdc00 | lo);
+ }
+ i += bread - 1;
+ }
+ consumedBytes(i);
+ appendedText(chars);
+ uint bleft = len - i;
+ if (_streamEof && bleft > 0)
+ invalidCharFlag = true; // incomplete character at end of stream
+ return chars;
+ }
+}
+
+class Utf16beLineStream : LineStream {
+ this(InputStream stream, string filename, ubyte[] buf, uint len) {
+ super(stream, new SourceFile(filename), EncodingType.UTF16BE, buf, 2, len);
+ }
+ override uint decodeText() {
+ if (invalidCharFlag) {
+ invalidCharError();
+ return 0;
+ }
+ uint bytesAvailable = readBytes();
+ ubyte * bytes = _buf.ptr + _pos;
+ if (bytesAvailable == 0)
+ return 0; // nothing to decode
+ uint len = bytesAvailable;
+ uint chars = 0;
+ ubyte* b = bytes;
+ dchar* text = reserveTextBuf(len / 2 + 1);
+ uint i = 0;
+ for (; i < len - 1; i += 2) {
+ uint ch0 = b[i];
+ uint ch1 = b[i + 1];
+ uint ch = (ch0 << 8) | ch1;
+ // TODO: check special cases
+ text[chars++] = ch;
+ }
+ consumedBytes(i);
+ appendedText(chars);
+ uint bleft = len - i;
+ if (_streamEof && bleft > 0)
+ invalidCharFlag = true; // incomplete character at end of stream
+ return chars;
+ }
+}
+
+class Utf16leLineStream : LineStream {
+ this(InputStream stream, string filename, ubyte[] buf, uint len) {
+ super(stream, new SourceFile(filename), EncodingType.UTF16LE, buf, 2, len);
+ }
+ override uint decodeText() {
+ if (invalidCharFlag) {
+ invalidCharError();
+ return 0;
+ }
+ uint bytesAvailable = readBytes();
+ ubyte * bytes = _buf.ptr + _pos;
+ if (bytesAvailable == 0)
+ return 0; // nothing to decode
+ uint len = bytesAvailable;
+ uint chars = 0;
+ ubyte* b = bytes;
+ dchar* text = reserveTextBuf(len / 2 + 1);
+ uint i = 0;
+ for (; i < len - 1; i += 2) {
+ uint ch0 = b[i];
+ uint ch1 = b[i + 1];
+ uint ch = (ch1 << 8) | ch0;
+ // TODO: check special cases
+ text[chars++] = ch;
+ }
+ consumedBytes(i);
+ appendedText(chars);
+ uint bleft = len - i;
+ if (_streamEof && bleft > 0)
+ invalidCharFlag = true; // incomplete character at end of stream
+ return chars;
+ }
+}
+
+class Utf32beLineStream : LineStream {
+ this(InputStream stream, string filename, ubyte[] buf, uint len) {
+ super(stream, new SourceFile(filename), EncodingType.UTF32BE, buf, 4, len);
+ }
+ override uint decodeText() {
+ if (invalidCharFlag) {
+ invalidCharError();
+ return 0;
+ }
+ uint bytesAvailable = readBytes();
+ ubyte * bytes = _buf.ptr + _pos;
+ if (bytesAvailable == 0)
+ return 0; // nothing to decode
+ uint len = bytesAvailable;
+ uint chars = 0;
+ ubyte* b = bytes;
+ dchar* text = reserveTextBuf(len / 2 + 1);
+ uint i = 0;
+ for (; i < len - 3; i += 4) {
+ uint ch0 = b[i];
+ uint ch1 = b[i + 1];
+ uint ch2 = b[i + 2];
+ uint ch3 = b[i + 3];
+ uint ch = (ch0 << 24) | (ch1 << 16) | (ch2 << 8) | ch3;
+ if ((ch >= 0xd800 && ch < 0xe000) || (ch > 0x10FFFF)) {
+ invalidCharFlag = true;
+ break;
+ }
+ text[chars++] = ch;
+ }
+ consumedBytes(i);
+ appendedText(chars);
+ uint bleft = len - i;
+ if (_streamEof && bleft > 0)
+ invalidCharFlag = true; // incomplete character at end of stream
+ return chars;
+ }
+}
+
+class Utf32leLineStream : LineStream {
+ this(InputStream stream, string filename, ubyte[] buf, uint len) {
+ super(stream, new SourceFile(filename), EncodingType.UTF32LE, buf, 4, len);
+ }
+ override uint decodeText() {
+ if (invalidCharFlag) {
+ invalidCharError();
+ return 0;
+ }
+ uint bytesAvailable = readBytes();
+ ubyte * bytes = _buf.ptr + _pos;
+ if (bytesAvailable == 0)
+ return 0; // nothing to decode
+ uint len = bytesAvailable;
+ uint chars = 0;
+ ubyte* b = bytes;
+ dchar* text = reserveTextBuf(len / 2 + 1);
+ uint i = 0;
+ for (; i < len - 3; i += 4) {
+ uint ch3 = b[i];
+ uint ch2 = b[i + 1];
+ uint ch1 = b[i + 2];
+ uint ch0 = b[i + 3];
+ uint ch = (ch0 << 24) | (ch1 << 16) | (ch2 << 8) | ch3;
+ if ((ch >= 0xd800 && ch < 0xe000) || (ch > 0x10FFFF)) {
+ invalidCharFlag = true;
+ break;
+ }
+ text[chars++] = ch;
+ }
+ consumedBytes(i);
+ appendedText(chars);
+ uint bleft = len - i;
+ if (_streamEof && bleft > 0)
+ invalidCharFlag = true; // incomplete character at end of stream
+ return chars;
+ }
+}
+
+
+unittest {
+ static if (false) {
+ import std.stdio;
+ import std.conv;
+ import std.utf;
+ //string fname = "C:\\projects\\d\\ddc\\ddclexer\\src\\ddc\\lexer\\LineStream.d";
+ //string fname = "/home/lve/src/d/ddc/ddclexer/" ~ __FILE__; //"/home/lve/src/d/ddc/ddclexer/src/ddc/lexer/Lexer.d";
+ //string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf8.d";
+ //string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf16be.d";
+ //string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf16le.d";
+ //string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf32be.d";
+ string fname = "/home/lve/src/d/ddc/ddclexer/tests/LineStream_utf32le.d";
+ writeln("opening file");
+ std.stream.File f = new std.stream.File(fname);
+ scope(exit) { f.close(); }
+ try {
+ LineStream lines = LineStream.create(f, fname);
+ for (;;) {
+ dchar[] s = lines.readLine();
+ if (s is null)
+ break;
+ writeln("line " ~ to!string(lines.line()) ~ ":" ~ toUTF8(s));
+ }
+ if (lines.errorCode != 0) {
+ writeln("Error ", lines.errorCode, " ", lines.errorMessage, " -- at line ", lines.errorLine, " position ", lines.errorPos);
+ } else {
+ writeln("EOF reached");
+ }
+ } catch (Exception e) {
+ writeln("Exception " ~ e.toString);
+ }
+ }
+}
+// LAST LINE
diff --git a/src/ddc/lexer/SourceEncodingException.d b/src/ddc/lexer/SourceEncodingException.d
new file mode 100644
index 0000000..d84a1f6
--- /dev/null
+++ b/src/ddc/lexer/SourceEncodingException.d
@@ -0,0 +1,10 @@
+module ddc.lexer.SourceEncodingException;
+
+class SourceEncodingException : Exception
+{
+ this(string msg)
+ {
+ super(msg);
+ }
+}
+
diff --git a/src/ddc/lexer/Tokenizer.d b/src/ddc/lexer/Tokenizer.d
new file mode 100644
index 0000000..fda1430
--- /dev/null
+++ b/src/ddc/lexer/Tokenizer.d
@@ -0,0 +1,2636 @@
+module ddc.lexer.Tokenizer;
+
+import ddc.lexer.textsource;
+import ddc.lexer.exceptions;
+
+import std.stdio;
+import std.datetime;
+import std.conv;
+import std.utf;
+import std.math;
+
+enum TokenType : ubyte {
+ EOF,
+ //EOL,
+ WHITESPACE,
+ COMMENT,
+ IDENTIFIER,
+ STRING,
+ CHARACTER,
+ INTEGER,
+ FLOAT,
+ KEYWORD,
+ OP
+}
+
+// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _
+// max code is 0xd7ff
+//1728
+const uint[1728] UNIVERSAL_ALPHA_FLAGS = [
+ 0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff
+ 0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff
+ 0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff
+ 0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff
+ 0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff
+ 0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff
+ 0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff
+ 0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff
+ 0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff
+ 0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff
+ 0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff
+ 0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff
+ 0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff
+ 0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff
+ 0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff
+ 0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff
+ 0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff
+ 0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff
+ 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff
+];
+
+// returns true if character is A..Z, a..z, _ or universal alpha
+public bool isUniversalAlpha(dchar ch) pure nothrow {
+ return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31))));
+}
+
+public bool isIdentStartChar(dchar ch) pure nothrow {
+ return isUniversalAlpha(ch);
+}
+
+public bool isIdentMiddleChar(dchar ch) pure nothrow {
+ return (ch >= '0' && ch <='9') || isUniversalAlpha(ch);
+}
+
+immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false;
+static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
+ bool r(dchar ch, wchar v) pure nothrow {
+ return ch == v;
+ }
+
+ bool r(dchar ch, wchar v1, wchar v2) pure nothrow {
+ return ch >= v1 && ch <= v2;
+ }
+
+ bool isUniversalAlphaSlow(dchar c) pure nothrow {
+ return
+ // Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217,
+ // 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F
+ r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217)
+ || r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F)
+ //Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6,
+ //03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D,
+ //1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D,
+ //1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC,
+ //1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC
+ || r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6)
+ || r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D)
+ || r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D)
+ || r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC)
+ || r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC)
+ //Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4,
+ //04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9
+ || r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4)
+ || r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9)
+ //Armenian: 0531−0556, 0561−0587
+ || r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587)
+ //Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA,
+ //05F0−05F2
+ || r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA)
+ || r(c, 0x05F0,0x05F2)
+ //Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE,
+ //06D0−06DC, 06E5−06E8, 06EA−06ED
+ || r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE)
+ || r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED)
+ //Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963
+ || r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963)
+ //Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0,
+ //09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD,
+ //09DC−09DD, 09DF−09E3, 09F0−09F1
+ || r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0)
+ || r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD)
+ || r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1)
+ //Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30,
+ //0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48,
+ //0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74
+ || r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30)
+ || r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48)
+ || r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74)
+ //Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8,
+ //0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5,
+ //0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0
+ || r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8)
+ || r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5)
+ || r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0)
+ // Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30,
+ //0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D,
+ //0B5C−0B5D, 0B5F−0B61
+ || r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30)
+ || r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D)
+ || r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61)
+ //Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A,
+ //0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5,
+ //0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD
+ || r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A)
+ || r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5)
+ || r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD)
+ //Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33,
+ //0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61
+ || r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33)
+ || r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61)
+ //Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3,
+ //0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE,
+ //0CE0−0CE1
+ || r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3)
+ || r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE)
+ || r(c, 0x0CE0,0x0CE1)
+ //Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39,
+ //0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61
+ || r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39)
+ || r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61)
+ //Thai: 0E01−0E3A, 0E40−0E5B
+ || r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B)
+ //Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97,
+ //0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB,
+ //0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6,
+ //0EC8−0ECD, 0EDC−0EDD
+ || r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97)
+ || r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB)
+ || r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6)
+ || r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD)
+ //Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69,
+ //0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD,
+ //0FB1−0FB7, 0FB9
+ || r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69)
+ || r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD)
+ || r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9)
+ //Georgian: 10A0−10C5, 10D0−10F6
+ || r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6)
+ //Hiragana: 3041−3093, 309B−309C
+ || r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C)
+ //Katakana: 30A1−30F6, 30FB−30FC
+ || r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC)
+ //Bopomofo: 3105−312C
+ || r(c, 0x3105,0x312C)
+ //CJK Unified Ideographs: 4E00−9FA5
+ || r(c, 0x4E00,0x9FA5)
+ //Hangul: AC00−D7A3
+ || r(c, 0xAC00,0xD7A3)
+ //Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F,
+ //0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF,
+ //0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33
+ || r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F)
+ || r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF)
+ || r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33)
+ //Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1,
+ //02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102,
+ //2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131,
+ //2133−2138, 2160−2182, 3005−3007, 3021−3029
+ || r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1)
+ || r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102)
+ || r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131)
+ || r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029)
+ ;
+ }
+
+}
+
+unittest {
+
+
+ static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
+ immutable uint itemsInRow = 8;
+
+ uint maxAlpha = 0;
+ for (uint i = 0; i < 0x10000; i++) {
+ uint ch = i;
+ if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
+ maxAlpha = i;
+ }
+ maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1;
+ writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _");
+ writefln("// max code is 0x%04x", maxAlpha);
+ writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = [");
+ for (uint i = 0; i <= maxAlpha; i += 32) {
+ if ((i / 32) % itemsInRow == 0)
+ write(" ");
+ uint flags = 0;
+ for (uint j = 0; j < 32; j++) {
+ uint ch = i + j;
+ bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
+ if (flag)
+ flags |= (1 << j);
+ }
+ writef("0x%08x", flags);
+ if (i != maxAlpha / 32 * 32)
+ write(",");
+ if ((i / 32) % itemsInRow == itemsInRow - 1)
+ writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31);
+ }
+ writeln("];");
+
+ for (uint ch = 0; ch < 0x100000; ch++) {
+ bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
+ bool flag2 = isUniversalAlpha(ch);
+ if (flag2 != flag) {
+ isUniversalAlpha(ch);
+ writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0);
+ }
+ assert(flag2 == flag);
+ }
+ }
+}
+
+enum OpCode : ubyte {
+ NONE, // no op
+ DIV, // /
+ DIV_EQ, // /=
+ DOT, // .
+ DOT_DOT, // ..
+ DOT_DOT_DOT,// ...
+ AND, // &
+ AND_EQ, // &=
+ LOG_AND, // &&
+ OR, // |
+ OR_EQ, // |=
+ LOG_OR, // ||
+ MINUS, // -
+ MINUS_EQ, // -=
+ MINUS_MINUS,// --
+ PLUS, // +
+ PLUS_EQ, // +=
+ PLUS_PLUS, // ++
+ LT, // <
+ LT_EQ, // <=
+ SHL, // <<
+ SHL_EQ, // <<=
+ LT_GT, // <>
+ NE_EQ, // <>=
+ GT, // >
+ GT_EQ, // >=
+ SHR_EQ, // >>=
+ ASR_EQ, // >>>=
+ SHR, // >>
+ ASR, // >>>
+ NOT, // !
+ NOT_EQ, // !=
+ NOT_LT_GT, // !<>
+ NOT_LT_GT_EQ, // !<>=
+ NOT_LT, // !<
+ NOT_LT_EQ, // !<=
+ NOT_GT, // !>
+ NOT_GT_EQ, // !>=
+ PAR_OPEN, // (
+ PAR_CLOSE, // )
+ SQ_OPEN, // [
+ SQ_CLOSE, // ]
+ CURL_OPEN, // {
+ CURL_CLOSE, // }
+ QUEST, // ?
+ COMMA, // ,
+ SEMICOLON, // ;
+ COLON, // :
+ DOLLAR, // $
+ EQ, // =
+ QE_EQ, // ==
+ MUL, // *
+ MUL_EQ, // *=
+ MOD, // %
+ MOD_EQ, // %=
+ XOR, // ^
+ XOR_EQ, // ^=
+ LOG_XOR, // ^^
+ LOG_XOR_EQ, // ^^=
+ INV, // ~
+ INV_EQ, // ~=
+ AT, // @
+ EQ_GT, // =>
+ SHARP // #
+};
+
+immutable dstring[] OP_CODE_STRINGS = [
+ "",
+ "/",
+ "/=",
+ ".",
+ "..",
+ "...",
+ "&",
+ "&=",
+ "&&",
+ "|",
+ "|=",
+ "||",
+ "-",
+ "-=",
+ "--",
+ "+",
+ "+=",
+ "++",
+ "<",
+ "<=",
+ "<<",
+ "<<=",
+ "<>",
+ "<>=",
+ ">",
+ ">=",
+ ">>=",
+ ">>>=",
+ ">>",
+ ">>>",
+ "!",
+ "!=",
+ "!<>",
+ "!<>=",
+ "!<",
+ "!<=",
+ "!>",
+ "!>=",
+ "(",
+ ")",
+ "[",
+ "]",
+ "{",
+ "}",
+ "?",
+ ",",
+ ";",
+ ":",
+ "$",
+ "=",
+ "==",
+ "*",
+ "*=",
+ "%",
+ "%=",
+ "^",
+ "^=",
+ "^^",
+ "^^=",
+ "~",
+ "~=",
+ "@",
+ "=>",
+ "#"
+];
+
+dstring getOpNameD(OpCode op) pure nothrow {
+ return OP_CODE_STRINGS[op];
+};
+
+enum Keyword : ubyte {
+ NONE,
+ ABSTRACT,
+ ALIAS,
+ ALIGN,
+ ASM,
+ ASSERT,
+ AUTO,
+
+ BODY,
+ BOOL,
+ BREAK,
+ BYTE,
+
+ CASE,
+ CAST,
+ CATCH,
+ CDOUBLE,
+ CENT,
+ CFLOAT,
+ CHAR,
+ CLASS,
+ CONST,
+ CONTINUE,
+ CREAL,
+
+ DCHAR,
+ DEBUG,
+ DEFAULT,
+ DELEGATE,
+ DELETE,
+ DEPRECATED,
+ DO,
+ DOUBLE,
+
+ ELSE,
+ ENUM,
+ EXPORT,
+ EXTERN,
+
+ FALSE,
+ FINAL,
+ FINALLY,
+ FLOAT,
+ FOR,
+ FOREACH,
+ FOREACH_REVERSE,
+ FUNCTION,
+
+ GOTO,
+
+ IDOUBLE,
+ IF,
+ IFLOAT,
+ IMMUTABLE,
+ IMPORT,
+ IN,
+ INOUT,
+ INT,
+ INTERFACE,
+ INVARIANT,
+ IREAL,
+ IS,
+
+ LAZY,
+ LONG,
+
+ MACRO,
+ MIXIN,
+ MODULE,
+
+ NEW,
+ NOTHROW,
+ NULL,
+
+ OUT,
+ OVERRIDE,
+
+ PACKAGE,
+ PRAGMA,
+ PRIVATE,
+ PROTECTED,
+ PUBLIC,
+ PURE,
+
+ REAL,
+ REF,
+ RETURN,
+
+ SCOPE,
+ SHARED,
+ SHORT,
+ STATIC,
+ STRUCT,
+ SUPER,
+ SWITCH,
+ SYNCHRONIZED,
+
+ TEMPLATE,
+ THIS,
+ THROW,
+ TRUE,
+ TRY,
+ TYPEDEF,
+ TYPEID,
+ TYPEOF,
+
+ UBYTE,
+ UCENT,
+ UINT,
+ ULONG,
+ UNION,
+ UNITTEST,
+ USHORT,
+
+ VERSION,
+ VOID,
+ VOLATILE,
+
+ WCHAR,
+ WHILE,
+ WITH,
+
+ FILE,
+ MODULE__,
+ LINE,
+ FUNCTION__,
+ PRETTY_FUNCTION,
+
+ //Special Token Replaced with
+ DATE, // string literal of the date of compilation "mmm dd yyyy"
+ EOF, // sets the scanner to the end of the file
+ TIME, // string literal of the time of compilation "hh:mm:ss"
+ TIMESTAMP, // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
+ VENDOR, // Compiler vendor string, such as "Digital Mars D"
+ VERSION_, // Compiler version as an integer, such as 2001
+
+ GSHARED,
+ TRAITS,
+ VECTOR,
+ PARAMETERS,
+
+}
+
+immutable dstring[] KEYWORD_STRINGS = [
+ "",
+ "abstract",
+ "alias",
+ "align",
+ "asm",
+ "assert",
+ "auto",
+
+ "body",
+ "bool",
+ "break",
+ "byte",
+
+ "case",
+ "cast",
+ "catch",
+ "cdouble",
+ "cent",
+ "cfloat",
+ "char",
+ "class",
+ "const",
+ "continue",
+ "creal",
+
+ "dchar",
+ "debug",
+ "default",
+ "delegate",
+ "delete",
+ "deprecated",
+ "do",
+ "double",
+
+ "else",
+ "enum",
+ "export",
+ "extern",
+
+ "false",
+ "final",
+ "finally",
+ "float",
+ "for",
+ "foreach",
+ "foreach_reverse",
+ "function",
+
+ "goto",
+
+ "idouble",
+ "if",
+ "ifloat",
+ "immutable",
+ "import",
+ "in",
+ "inout",
+ "int",
+ "interface",
+ "invariant",
+ "ireal",
+ "is",
+
+ "lazy",
+ "long",
+
+ "macro",
+ "mixin",
+ "module",
+
+ "new",
+ "nothrow",
+ "null",
+
+ "out",
+ "override",
+
+ "package",
+ "pragma",
+ "private",
+ "protected",
+ "public",
+ "pure",
+
+ "real",
+ "ref",
+ "return",
+
+ "scope",
+ "shared",
+ "short",
+ "static",
+ "struct",
+ "super",
+ "switch",
+ "synchronized",
+
+ "template",
+ "this",
+ "throw",
+ "true",
+ "try",
+ "typedef",
+ "typeid",
+ "typeof",
+
+ "ubyte",
+ "ucent",
+ "uint",
+ "ulong",
+ "union",
+ "unittest",
+ "ushort",
+
+ "version",
+ "void",
+ "volatile",
+
+ "wchar",
+ "while",
+ "with",
+
+ "__FILE__",
+ "__MODULE__",
+ "__LINE__",
+ "__FUNCTION__",
+ "__PRETTY_FUNCTION__",
+
+ //Special Token Replaced with
+ "__DATE__", // string literal of the date of compilation "mmm dd yyyy"
+ "__EOF__", // sets the scanner to the end of the file
+ "__TIME__", // string literal of the time of compilation "hh:mm:ss"
+ "__TIMESTAMP__", // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
+ "__VENDOR__", // Compiler vendor string, such as "Digital Mars D"
+ "__VERSION__", // Compiler version as an integer, such as 2001
+
+
+ "__gshared",
+ "__traits",
+ "__vector",
+ "__parameters"
+];
+
+public dstring getKeywordNameD(Keyword keyword) pure nothrow {
+ return KEYWORD_STRINGS[keyword];
+};
+
+public Keyword findKeyword(Keyword start, Keyword end, dchar * name, uint len, ref uint pos) pure nothrow {
+ for (Keyword i = start; i <= end; i++) {
+ dstring s = KEYWORD_STRINGS[i];
+ if (s.length > len + 1)
+ continue; // too long
+ bool found = true;
+ for (uint j = 1; j < s.length; j++) {
+ if (s[j] != name[j - 1]) {
+ found = false;
+ break;
+ }
+ }
+ if (found) {
+ if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) {
+ pos += s.length - 1;
+ return i;
+ }
+ }
+ }
+ return Keyword.NONE;
+}
+
+/**
+ * Token.
+ */
+class Token {
+ protected SourceFile _file;
+ protected uint _line;
+ protected uint _pos;
+ protected TokenType _type;
+ public @property TokenType type() { return _type; }
+ public @property string filename() { return _file.filename; }
+ public @property uint line() { return _line; }
+ public @property uint pos() { return _pos; }
+ public @property dchar[] text() { return null; }
+ public @property dchar literalType() { return 0; }
+ public @property ulong intValue() { return 0; }
+ public @property bool isUnsigned() { return false; }
+ public @property ulong isLong() { return false; }
+ public @property real realValue() { return 0; }
+ public @property double doubleValue() { return 0; }
+ public @property float floatValue() { return 0; }
+ public @property byte precision() { return 0; }
+ public @property bool isImaginary() { return false; }
+ public @property OpCode opCode() { return OpCode.NONE; }
+ public @property Keyword keyword() { return Keyword.NONE; }
+
+ this(TokenType type) {
+ _type = type;
+ }
+
+ this(TokenType type, SourceFile file, uint line, uint pos) {
+ _type = type;
+ _file = file;
+ _line = line;
+ _pos = pos;
+ }
+
+ void setPos(SourceFile file, uint line, uint pos) {
+ _file = file;
+ _line = line;
+ _pos = pos + 1;
+ }
+
+ void setFile(SourceFile file) {
+ _file = file;
+ }
+
+ void setPos(uint line, uint pos) {
+ _line = line;
+ _pos = pos + 1;
+ }
+
+ public abstract Token clone();
+ public override @property string toString() {
+ return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword)
+ ~" \"" ~ toUTF8(text()) ~ "\"";
+ }
+}
+
+class EofToken : Token {
+ this() {
+ super(TokenType.EOF);
+ }
+ this(SourceFile file, uint line, uint pos) {
+ super(TokenType.EOF, file, line, pos);
+ }
+ override public Token clone() {
+ return new EofToken(_file, _line, _pos);
+ }
+ public override @property string toString() {
+ return "EOF";
+ }
+}
+
+// treat as white space
+//class EolToken : Token {
+// this(string file, uint line, uint pos) {
+// super(TokenType.EOL, file, line, pos);
+// }
+//}
+
+class WhiteSpaceToken : Token {
+ this() {
+ super(TokenType.WHITESPACE);
+ }
+ this(SourceFile file, uint line, uint pos) {
+ super(TokenType.WHITESPACE, file, line, pos);
+ }
+ override public Token clone() {
+ return new WhiteSpaceToken(_file, _line, _pos);
+ }
+ public override @property string toString() {
+ return "WhiteSpace";
+ }
+}
+
+class OpToken : Token {
+ OpCode _op;
+ public @property override OpCode opCode() { return _op; }
+ public @property void opCode(OpCode op) { _op = op; }
+ public @property override dchar[] text() { return cast(dchar[])getOpNameD(_op); }
+ this() {
+ super(TokenType.OP);
+ }
+ this(SourceFile file, uint line, uint pos) {
+ super(TokenType.OP, file, line, pos);
+ }
+ override public Token clone() {
+ return new OpToken(_file, _line, _pos);
+ }
+ public override @property string toString() {
+ return "Op:" ~ to!string(_op);
+ }
+}
+
+class KeywordToken : Token {
+ Keyword _keyword;
+ public @property override Keyword keyword() { return _keyword; }
+ public @property void keyword(Keyword keyword) { _keyword = keyword; }
+ public @property override dchar[] text() { return cast(dchar[])getKeywordNameD(_keyword); }
+ this() {
+ super(TokenType.KEYWORD);
+ }
+ this(SourceFile file, uint line, uint pos) {
+ super(TokenType.KEYWORD, file, line, pos);
+ }
+ override public Token clone() {
+ return new KeywordToken(_file, _line, _pos);
+ }
+ public override @property string toString() {
+ return "Keyword:" ~ to!string(_keyword);
+ }
+}
+
+// do we need comment text?
+
+class CommentToken : Token {
+ dchar[] _text;
+ public @property override dchar[] text() { return _text; }
+ public @property void text(dchar[] text) { _text = text; }
+ this() {
+ super(TokenType.COMMENT);
+ }
+ this(SourceFile file, uint line, uint pos, dchar[] text) {
+ super(TokenType.COMMENT, file, line, pos);
+ _text = text;
+ }
+ override public Token clone() {
+ return new CommentToken(_file, _line, _pos, _text);
+ }
+ public override @property string toString() {
+ return "Comment:" ~ to!string(_text);
+ }
+}
+
+alias tokenizer_ident_t = uint;
+alias tokenizer_ident_name_t = dchar[];
+
+enum : tokenizer_ident_t {
+ NO_IDENT = 0
+}
+
+/**
+ * Global storage for identifier strings.
+ */
+class IdentHolder {
+ protected tokenizer_ident_t _nextId;
+ protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName;
+ protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId;
+
+ public this() {
+ _nextId = NO_IDENT + 1;
+ }
+
+ /**
+ * Search for id by name, return NO_IDENT if not found.
+ */
+ uint findByName(tokenizer_ident_name_t name) {
+ tokenizer_ident_t * found = (name in _nameToId);
+ if (found)
+ return *found;
+ return NO_IDENT;
+ }
+
+ /**
+ * Search for name by id, return null if not found.
+ */
+ tokenizer_ident_name_t nameById(tokenizer_ident_t id) {
+ auto found = (id in _idToName);
+ if (found)
+ return *found;
+ return null;
+ }
+
+ /**
+ * Search for ident id by name, create new entry if not found.
+ */
+ tokenizer_ident_t idByName(tokenizer_ident_name_t name) {
+ uint * found = (name in _nameToId);
+ if (found)
+ return *found;
+ uint newid = _nextId++;
+ _nameToId[cast(dstring)name] = newid;
+ _idToName[newid] = cast(tokenizer_ident_name_t)name;
+ return newid;
+ }
+}
+
+/**
+* Thread local storage for IDs.
+*/
+IdentHolder identMap;
+
+static this() {
+ // init ID storage
+ identMap = new IdentHolder();
+}
+
+class StringLiteralToken : Token {
+ dchar[] _text;
+ dchar _literalType;
+ public @property override dchar literalType() { return _literalType; }
+ public @property override dchar[] text() { return _text; }
+ public void setText(dchar[] text, dchar type) { _text = text; _literalType = type; }
+ this() {
+ super(TokenType.STRING);
+ }
+ this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) {
+ super(TokenType.STRING, file, line, pos);
+ _text = text;
+ _literalType = type;
+ }
+ override public Token clone() {
+ return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType);
+ }
+ public override @property string toString() {
+ return "String:" ~ to!string(_text);
+ }
+}
+
+class IntegerLiteralToken : Token {
+ ulong _value;
+ bool _unsigned;
+ bool _long;
+ public @property override ulong intValue() { return _value; }
+ public @property override bool isUnsigned() { return _unsigned; }
+ public @property override ulong isLong() { return _long; }
+ public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); }
+ public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) {
+ _value = value;
+ _unsigned = unsignedFlag;
+ _long = longFlag;
+ }
+ public void setFlags(bool unsignedFlag = false, bool longFlag = false) {
+ _unsigned = unsignedFlag;
+ _long = longFlag;
+ }
+ this() {
+ super(TokenType.INTEGER);
+ }
+ this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) {
+ super(TokenType.INTEGER, file, line, pos);
+ _value = value;
+ _unsigned = unsignedFlag;
+ _long = longFlag;
+ }
+ override public Token clone() {
+ return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long);
+ }
+ public override @property string toString() {
+ return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : "");
+ }
+}
+
+class RealLiteralToken : Token {
+ real _value;
+ byte _precision;
+ bool _imaginary;
+ public @property override ulong intValue() { return to!long(_value); }
+ public @property override real realValue() { return _value; }
+ public @property override double doubleValue() { return cast(double)_value; }
+ public @property override float floatValue() { return cast(float)_value; }
+ public @property override byte precision() { return _precision; }
+ public @property override bool isImaginary() { return _imaginary; }
+ public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); }
+ public void setValue(real value, byte precision = 1, bool imaginary = false) {
+ _value = value;
+ _precision = precision;
+ _imaginary = imaginary;
+ }
+ public void setFlags(byte precision = 1, bool imaginary = false) {
+ _precision = precision;
+ _imaginary = imaginary;
+ }
+ this() {
+ super(TokenType.FLOAT);
+ }
+ this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) {
+ super(TokenType.FLOAT, file, line, pos);
+ _value = value;
+ _precision = precision;
+ _imaginary = imaginary;
+ }
+ override public Token clone() {
+ return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary);
+ }
+ public override @property string toString() {
+ return "Integer:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
+ }
+}
+
+class IdentToken : Token {
+ tokenizer_ident_t _id;
+ public @property override dchar[] text() { return identMap.nameById(_id); }
+ public void setText(dchar[] text) { _id = identMap.idByName(text); }
+ this() {
+ super(TokenType.IDENTIFIER);
+ }
+ this(SourceFile file, uint line, uint pos, dchar[] text) {
+ super(TokenType.IDENTIFIER, file, line, pos);
+ _id = identMap.idByName(text);
+ }
+ this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) {
+ super(TokenType.IDENTIFIER, file, line, pos);
+ _id = id;
+ }
+ override public Token clone() {
+ return new IdentToken(_file, _line, _pos, _id);
+ }
+ public override @property string toString() {
+ return "Ident:" ~ to!string(text);
+ }
+}
+
+// shared appender buffer, to avoid extra heap allocations
+struct StringAppender {
+ dchar[] buf;
+ uint len;
+ dchar[] get() {
+ return buf[0 .. len];
+ }
+ void appendEol() {
+ if (len + 1 > buf.length) {
+ uint newsize = cast(uint)((len + 1 + buf.length) * 2);
+ if (newsize < 128)
+ newsize = 128;
+ buf.length = newsize;
+ }
+ buf[len] = '\n';
+ len++;
+ }
+ void append(dchar[] s) {
+ if (s.length == 0)
+ return;
+ if (len + s.length > buf.length) {
+ uint newsize = cast(uint)((len + s.length + buf.length) * 2);
+ if (newsize < 128)
+ newsize = 128;
+ buf.length = newsize;
+ }
+ buf[len .. len + s.length] = s;
+ len += s.length;
+ }
+ void reset() {
+ len = 0;
+ }
+}
+
+class Tokenizer
+{
+ SourceLines _lineStream;
+ dchar[] _lineText;
+ uint _line; // current line number
+ uint _len; // current line length
+ uint _pos; // current line read position
+ uint _state; // tokenizer state
+
+ enum : int {
+ EOF_CHAR = 0x001A,
+ EOL_CHAR = 0x000A
+ };
+
+ WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken();
+ CommentToken _sharedCommentToken = new CommentToken();
+ StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken();
+ IdentToken _sharedIdentToken = new IdentToken();
+ OpToken _sharedOpToken = new OpToken();
+ KeywordToken _sharedKeywordToken = new KeywordToken();
+ IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken();
+ RealLiteralToken _sharedRealToken = new RealLiteralToken();
+ StringAppender _stringLiteralAppender;
+ StringAppender _commentAppender;
+ StringAppender _identAppender;
+
+ bool _enableCommentText = true;
+ public void enableCommentText(bool enabled) {
+ _enableCommentText = enabled;
+ }
+
+ this(SourceLines lineStream) {
+ init(lineStream);
+ }
+
+ void init(SourceLines lineStream) {
+ _lineStream = lineStream;
+ _sharedWhiteSpaceToken.setFile(_lineStream.file);
+ _sharedCommentToken.setFile(_lineStream.file);
+ _sharedStringLiteralToken.setFile(_lineStream.file);
+ _sharedIdentToken.setFile(_lineStream.file);
+ _sharedOpToken.setFile(_lineStream.file);
+ _sharedKeywordToken.setFile(_lineStream.file);
+ _sharedIntegerToken.setFile(_lineStream.file);
+ _sharedRealToken.setFile(_lineStream.file);
+ buildTime = Clock.currTime();
+ _line = lineStream.line;
+ _pos = 0;
+ _lineText = null;
+ }
+
+ this(string code, string filename = "") {
+ this(new ArraySourceLines(code, filename));
+ }
+
+ // fetch next line from source stream
+ bool nextLine() {
+ _lineText = _lineStream.readLine();
+ if (_lineText is null) {
+ if (_lineStream.errorCode != 0)
+ throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file.filename, _lineStream.errorLine, _lineStream.errorPos);
+ _pos = 0;
+ _len = 0;
+ return false;
+ }
+ _line = _lineStream.line;
+ _pos = 0;
+ _len = cast(uint)_lineText.length; // do not support lines longer that 4Gb
+ return true;
+ }
+
+ dchar nextChar() {
+ if (_lineText is null) {
+ if (!nextLine()) {
+ return EOF_CHAR;
+ }
+ } else if (_pos >= _len) {
+ if (!nextLine()) {
+ return EOF_CHAR;
+ }
+ return EOL_CHAR;
+ }
+ return _lineText[_pos++];
+ }
+
+ dchar peekChar() {
+ if (_lineText is null) {
+ if (!nextLine()) {
+ return EOF_CHAR;
+ }
+ }
+ if (_pos >= _len)
+ return EOL_CHAR;
+ return _lineText[_pos++];
+ }
+
+ Token emitEof() {
+ // TODO: check for current state
+ return new EofToken(_lineStream.file, _line, _pos);
+ }
+
+ Token processWhiteSpace(dchar firstChar) {
+ uint line = _line;
+ uint pos = _pos - 1;
+ for (;;) {
+ uint i = _pos;
+ for (; i < _len; i++) {
+ dchar ch = _lineText[i];
+ if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C))
+ break;
+ }
+ _pos = i;
+ if (_pos < _len)
+ break;
+ // go to next line
+ if (!nextLine())
+ break;
+ }
+ // reuse the same token instance, to avoid extra heap spamming
+ _sharedWhiteSpaceToken.setPos(line, pos);
+ return _sharedWhiteSpaceToken;
+ }
+
+ Token processOneLineComment() {
+ _sharedCommentToken.setPos(_line, _pos - 1);
+ if (_enableCommentText) {
+ _sharedCommentToken.text = _lineText[_pos + 1 .. $];
+ }
+ _pos = _len;
+ return _sharedCommentToken;
+ }
+
+ // Comment /* */
+ Token processMultilineComment() {
+ _sharedCommentToken.setPos(_line, _pos - 1);
+ _commentAppender.reset();
+ uint textStart = _pos + 1;
+ for (;;) {
+ uint textEnd = uint.max;
+ uint i = textStart;
+ for (; i < _len - 1; i++) {
+ if (_lineText[i] == '*' && _lineText[i + 1] == '/') {
+ textEnd = i;
+ break;
+ }
+ }
+ if (textEnd != uint.max) {
+ if (_enableCommentText)
+ _commentAppender.append(_lineText[textStart .. textEnd]);
+ _pos = textEnd + 2;
+ break;
+ }
+ if (!nextLine()) {
+ // TODO: do we need throw exception if comment not closed by end of file?
+ _pos = _len;
+ break;
+ }
+ textStart = 0;
+ }
+ if (_enableCommentText) {
+ _sharedCommentToken.text = _commentAppender.get();
+ }
+ return _sharedCommentToken;
+ }
+
+ // Comment /* */
+ Token processNestedComment() {
+ _sharedCommentToken.setPos(_line, _pos - 1);
+ _commentAppender.reset();
+ dchar[] text;
+ uint textStart = _pos + 1;
+ int level = 1;
+ for (;;) {
+ uint textEnd = uint.max;
+ uint i = textStart;
+ for (; i < _len - 1; i++) {
+ if (_lineText[i] == '/' && _lineText[i + 1] == '+') {
+ level++;
+ i++;
+ } else if (_lineText[i] == '+' && _lineText[i + 1] == '/') {
+ if (--level == 0) {
+ textEnd = i;
+ break;
+ }
+ }
+ }
+ if (textEnd != uint.max) {
+ if (_enableCommentText)
+ _commentAppender.append(_lineText[textStart .. textEnd]);
+ _pos = textEnd + 2;
+ break;
+ }
+ if (!nextLine()) {
+ // TODO: do we need throw exception if comment not closed by end of file?
+ _pos = _len;
+ break;
+ }
+ if (_enableCommentText)
+ _commentAppender.appendEol();
+ textStart = 0;
+ }
+ if (_enableCommentText) {
+ _sharedCommentToken.text = _commentAppender.get();
+ }
+ return _sharedCommentToken;
+ }
+
+ Token processHexString() {
+ _pos++;
+ // TODO:
+ return null;
+ }
+
+ Token processDelimitedString() {
+ _pos++;
+ // TODO:
+ return null;
+ }
+
+ // r"string" or `string`
+ Token processWysiwygString(dchar ch) {
+ _pos++;
+ // TODO:
+ return null;
+ }
+
+ Token processIdent() {
+ _sharedIdentToken.setPos(_line, _pos - 1);
+ _identAppender.reset();
+ uint startPos = _pos - 1;
+ uint endPos = _len;
+ for (uint i = _pos; i < _len; i++) {
+ dchar ch = _lineText[i];
+ if (!isIdentMiddleChar(ch)) {
+ endPos = i;
+ break;
+ }
+ }
+ _pos = endPos;
+ _sharedIdentToken.setText(_lineText[startPos .. endPos]);
+ return _sharedIdentToken;
+ }
+
+ Token processIntegerSuffix() {
+ if (_pos >= _len)
+ return _sharedIntegerToken;
+ bool longFlag = false;
+ bool unsignedFlag = false;
+ dchar ch = _lineText[_pos];
+ dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
+ if (ch == 'l' || ch == 'L') {
+ longFlag = true;
+ _pos++;
+ if (ch2 == 'u' || ch2 == 'U') {
+ unsignedFlag = true;
+ _pos++;
+ }
+ } else if (ch == 'u' || ch == 'U') {
+ unsignedFlag = true;
+ _pos++;
+ if (ch2 == 'l' || ch2 == 'L') {
+ longFlag = true;
+ _pos++;
+ }
+ }
+ _sharedIntegerToken.setFlags(unsignedFlag, longFlag);
+ ch = _pos < _len ? _lineText[_pos] : 0;
+ if (isIdentMiddleChar(ch))
+ parserError("Unexpected character after number");
+ return _sharedIntegerToken;
+ }
+
+ Token processBinaryNumber() {
+ _sharedIntegerToken.setPos(_line, _pos - 1);
+ _pos++;
+ if (_pos >= _len)
+ parserError("Unexpected end of line in binary number");
+ int digits = 0;
+ ulong number = 0;
+ uint i = _pos;
+ for (;i < _len; i++) {
+ dchar ch = _lineText[i];
+ if (ch != '0' && ch != '1')
+ break;
+ number = (number << 1) | (ch == '1' ? 1 : 0);
+ digits++;
+ }
+ _pos = i;
+ if (digits > 64)
+ parserError("number is too big");
+ _sharedIntegerToken.setValue(number);
+ return processIntegerSuffix();
+ }
+
+ Token processHexNumber() {
+ _sharedIntegerToken.setPos(_line, _pos - 1);
+ _sharedRealToken.setPos(_line, _pos - 1);
+ _pos++;
+ if (_pos >= _len)
+ parserError("Unexpected end of line in hex number");
+ int digits = 0;
+ ulong number = 0;
+ uint i = _pos;
+ for (;i < _len; i++) {
+ dchar ch = _lineText[i];
+ uint digit = 0;
+ if (ch >= '0' && ch <= '9')
+ digit = ch - '0';
+ else if (ch >= 'a' && ch <= 'f')
+ digit = ch - 'a' + 10;
+ else if (ch >= 'A' && ch <= 'F')
+ digit = ch - 'A' + 10;
+ else if (ch == '_')
+ continue;
+ else
+ break;
+ number = (number << 4) | digit;
+ digits++;
+ }
+ _pos = i;
+ if (digits > 16)
+ parserError("number is too big to fit 64 bits");
+ _sharedIntegerToken.setValue(number);
+ return processIntegerSuffix();
+ }
+
+ Token processOctNumber() {
+ _sharedIntegerToken.setPos(_line, _pos - 1);
+ if (_pos >= _len)
+ parserError("Unexpected end of line in octal number");
+ int digits = 0;
+ ulong number = 0;
+ uint i = _pos;
+ bool overflow = false;
+ for (;i < _len; i++) {
+ dchar ch = _lineText[i];
+ uint digit = 0;
+ if (ch >= '0' && ch <= '7')
+ digit = ch - '0';
+ else if (ch == '_')
+ continue;
+ else
+ break;
+ number <<= 3;
+ if (digits >= 20) {
+ if ((number >> 3) << 3 != number) {
+ overflow = true;
+ break;
+ }
+ }
+ number |= digit;
+ digits++;
+ }
+ _pos = i;
+ if (overflow)
+ parserError("number is too big to fit 64 bits");
+ _sharedIntegerToken.setValue(number);
+ return processIntegerSuffix();
+ }
+
+ //
+ Token processDecFloatSuffix(real value) {
+ _sharedRealToken.setValue(value);
+ // TODO
+ return _sharedRealToken;
+ }
+
+ // after E char
+ Token processDecFloatExponent(real value) {
+ dchar next = _pos < _len ? _lineText[_pos] : 0;
+ int sign = 1;
+ if (next == '+') {
+ _pos++;
+ } else if (next == '-') {
+ _pos++;
+ sign = -1;
+ }
+ if (_pos >= _len)
+ parserError("Invalid exponent");
+ ulong digits = 0;
+ ulong number = 0;
+ uint i = _pos;
+ bool overflow = false;
+ for (;i < _len; i++) {
+ dchar ch = _lineText[i];
+ uint digit = 0;
+ if (ch >= '0' && ch <= '9')
+ digit = ch - '0';
+ else if (ch == '_')
+ continue;
+ else
+ break;
+ number *= 10;
+ if (digits >= 18) {
+ if ((number * 10) / 10 != number) {
+ overflow = true;
+ break;
+ }
+ }
+ number += digit;
+ digits++;
+ }
+ if (digits == 0)
+ parserError("Invalid exponent");
+ _pos = i;
+ value *= pow(10., cast(long)number * sign);
+ return processDecFloatSuffix(value);
+ }
+
+ Token processDecFloatSecondPart(ulong firstPart) {
+ if (_pos >= _len) {
+ _sharedRealToken.setValue(cast(real)firstPart);
+ return _sharedRealToken;
+ }
+ ulong divider = 1;
+ ulong number = 0;
+ uint i = _pos;
+ bool overflow = false;
+ for (;i < _len; i++) {
+ dchar ch = _lineText[i];
+ uint digit = 0;
+ if (ch >= '0' && ch <= '9')
+ digit = ch - '0';
+ else if (ch == '_')
+ continue;
+ else
+ break;
+ if (divider * 10 < divider)
+ continue; // ignore extra digits
+ number *= 10;
+ number += digit;
+ divider *= 10;
+ }
+ _pos = i;
+ real value = cast(real)firstPart + (cast(real)number / divider);
+ dchar next = _pos < _len ? _lineText[_pos] : 0;
+ if (next == 0) {
+ // neither exponent nor suffix
+ _sharedRealToken.setValue(value);
+ return _sharedRealToken;
+ }
+ if (next == 'e' || next == 'E') {
+ _pos++;
+ return processDecFloatExponent(value);
+ }
+ return processDecFloatSuffix(value);
+ }
+
+ Token processDecNumber(dchar c) {
+ _pos--;
+ _sharedIntegerToken.setPos(_line, _pos);
+ _sharedRealToken.setPos(_line, _pos);
+ if (_pos >= _len)
+ parserError("Unexpected end of line in number");
+ int digits = 0;
+ ulong number = 0;
+ uint i = _pos;
+ bool overflow = false;
+ for (;i < _len; i++) {
+ dchar ch = _lineText[i];
+ uint digit = 0;
+ if (ch >= '0' && ch <= '9')
+ digit = ch - '0';
+ else if (ch == '_')
+ continue;
+ else
+ break;
+ number *= 10;
+ if (digits >= 18) {
+ if ((number * 10) / 10 != number) {
+ overflow = true;
+ break;
+ }
+ }
+ number += digit;
+ digits++;
+ }
+ _pos = i;
+ if (overflow)
+ parserError("number is too big to fit 64 bits");
+ _sharedIntegerToken.setValue(number);
+ dchar next = _pos < _len ? _lineText[_pos] : 0;
+ if (next == 0)
+ return _sharedIntegerToken;
+ if (next == '.') {
+ _pos++;
+ return processDecFloatSecondPart(number);
+ }
+ return processIntegerSuffix();
+ }
+
+ void parserError(string msg) {
+ throw new ParserException(msg, _lineStream.file.filename, _line, _pos);
+ }
+
+ Keyword detectKeyword(dchar ch) {
+ if (ch > 'z')
+ return Keyword.NONE;
+ uint len = _len - _pos;
+ switch (cast(ubyte)ch) {
+ // ABSTRACT,
+ // ALIAS,
+ // ALIGN,
+ // ASM,
+ // ASSERT,
+ // AUTO,
+ case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos);
+
+ // BODY,
+ // BOOL,
+ // BREAK,
+ // BYTE,
+ case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos);
+
+ // CASE,
+ // CAST,
+ // CATCH,
+ // CDOUBLE,
+ // CENT,
+ // CFLOAT,
+ // CHAR,
+ // CLASS,
+ // CONST,
+ // CONTINUE,
+ // CREAL,
+ case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos);
+
+ // DCHAR,
+ // DEBUG,
+ // DEFAULT,
+ // DELEGATE,
+ // DELETE,
+ // DEPRECATED,
+ // DO,
+ // DOUBLE,
+ case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos);
+
+ // ELSE,
+ // ENUM,
+ // EXPORT,
+ // EXTERN,
+ case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos);
+
+ // FALSE,
+ // FINAL,
+ // FINALLY,
+ // FLOAT,
+ // FOR,
+ // FOREACH,
+ // FOREACH_REVERSE,
+ // FUNCTION,
+ case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos);
+
+ // GOTO,
+ case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos);
+
+ // IDOUBLE,
+ // IF,
+ // IFLOAT,
+ // IMMUTABLE,
+ // IMPORT,
+ // IN,
+ // INOUT,
+ // INT,
+ // INTERFACE,
+ // INVARIANT,
+ // IREAL,
+ // IS,
+ case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos);
+
+ // LAZY,
+ // LONG,
+ case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos);
+
+ // MACRO,
+ // MIXIN,
+ // MODULE,
+ case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos);
+
+ // NEW,
+ // NOTHROW,
+ // NULL,
+ case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos);
+
+ // OUT,
+ // OVERRIDE,
+ case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos);
+
+ // PACKAGE,
+ // PRAGMA,
+ // PRIVATE,
+ // PROTECTED,
+ // PUBLIC,
+ // PURE,
+ case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos);
+
+ // REAL,
+ // REF,
+ // RETURN,
+ case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos);
+
+ // SCOPE,
+ // SHARED,
+ // SHORT,
+ // STATIC,
+ // STRUCT,
+ // SUPER,
+ // SWITCH,
+ // SYNCHRONIZED,
+ case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos);
+
+ // TEMPLATE,
+ // THIS,
+ // THROW,
+ // TRUE,
+ // TRY,
+ // TYPEDEF,
+ // TYPEID,
+ // TYPEOF,
+ case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos);
+
+ // UBYTE,
+ // UCENT,
+ // UINT,
+ // ULONG,
+ // UNION,
+ // UNITTEST,
+ // USHORT,
+ case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos);
+
+ // VERSION,
+ // VOID,
+ // VOLATILE,
+ case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos);
+
+ // WCHAR,
+ // WHILE,
+ // WITH,
+ case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos);
+
+ // FILE,
+ // MODULE,
+ // LINE,
+ // FUNCTION,
+ // PRETTY_FUNCTION,
+ //
+ // GSHARED,
+ // TRAITS,
+ // VECTOR,
+ // PARAMETERS,
+ case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos);
+ default: return Keyword.NONE;
+ }
+ }
+ OpCode detectOp(dchar ch) nothrow {
+ if (ch >= 128)
+ return OpCode.NONE;
+ dchar ch2 = _pos < _len ? _lineText[_pos] : 0;
+ dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
+ switch(cast(ubyte)ch) {
+ // DIV, // /
+ // DIV_EQ, // /=
+ case '/':
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.DIV_EQ;
+ }
+ return OpCode.DIV;
+ // DOT, // .
+ // DOT_DOT, // ..
+ // DOT_DOT_DOT,// ...
+ case '.':
+ if (ch2 == '.') {
+ if (ch3 == '.') {
+ _pos += 2;
+ return OpCode.DOT_DOT_DOT;
+ }
+ _pos++;
+ return OpCode.DOT_DOT;
+ }
+ return OpCode.DOT;
+ // AND, // &
+ // AND_EQ, // &=
+ // LOG_AND, // &&
+ case '&':
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.AND_EQ;
+ }
+ if (ch2 == '&') {
+ _pos++;
+ return OpCode.LOG_AND;
+ }
+ return OpCode.AND;
+ // OR, // |
+ // OR_EQ, // |=
+ // LOG_OR, // ||
+ case '|':
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.OR_EQ;
+ }
+ if (ch2 == '|') {
+ _pos++;
+ return OpCode.LOG_OR;
+ }
+ return OpCode.OR;
+ // MINUS, // -
+ // MINUS_EQ, // -=
+ // MINUS_MINUS,// --
+ case '-':
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.MINUS_EQ;
+ }
+ if (ch2 == '-') {
+ _pos++;
+ return OpCode.MINUS_MINUS;
+ }
+ return OpCode.MINUS;
+ // PLUS, // +
+ // PLUS_EQ, // +=
+ // PLUS_PLUS, // ++
+ case '+':
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.PLUS_EQ;
+ }
+ if (ch2 == '+') {
+ _pos++;
+ return OpCode.PLUS_PLUS;
+ }
+ return OpCode.PLUS;
+ // LT, // <
+ // LT_EQ, // <=
+ // SHL, // <<
+ // SHL_EQ, // <<=
+ // LT_GT, // <>
+ // NE_EQ, // <>=
+ case '<':
+ if (ch2 == '<') {
+ if (ch3 == '=') {
+ _pos += 2;
+ return OpCode.SHL_EQ;
+ }
+ _pos++;
+ return OpCode.SHL;
+ }
+ if (ch2 == '>') {
+ if (ch3 == '=') {
+ _pos += 2;
+ return OpCode.NE_EQ;
+ }
+ _pos++;
+ return OpCode.LT_GT;
+ }
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.LT_EQ;
+ }
+ return OpCode.LT;
+ // GT, // >
+ // GT_EQ, // >=
+ // SHR_EQ // >>=
+ // ASR_EQ, // >>>=
+ // SHR, // >>
+ // ASR, // >>>
+ case '>':
+ if (ch2 == '>') {
+ if (ch3 == '>') {
+ dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
+ if (ch4 == '=') { // >>>=
+ _pos += 3;
+ return OpCode.ASR_EQ;
+ }
+ _pos += 2;
+ return OpCode.ASR; // >>>
+ }
+ if (ch3 == '=') { // >>=
+ _pos += 2;
+ return OpCode.SHR_EQ;
+ }
+ _pos++;
+ return OpCode.SHR;
+ }
+ if (ch2 == '=') { // >=
+ _pos++;
+ return OpCode.GT_EQ;
+ }
+ // >
+ return OpCode.GT;
+ // NOT, // !
+ // NOT_EQ // !=
+ // NOT_LT_GT, // !<>
+ // NOT_LT_GT_EQ, // !<>=
+ // NOT_LT, // !<
+ // NOT_LT_EQ, // !<=
+ // NOT_GT, // !>
+ // NOT_GT_EQ, // !>=
+ case '!':
+ if (ch2 == '<') { // !<
+ if (ch3 == '>') { // !<>
+ dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
+ if (ch4 == '=') { // !<>=
+ _pos += 3;
+ return OpCode.NOT_LT_GT_EQ;
+ }
+ _pos += 2;
+ return OpCode.NOT_LT_GT; // !<>
+ }
+ if (ch3 == '=') { // !<=
+ _pos += 2;
+ return OpCode.NOT_LT_EQ;
+ }
+ _pos++;
+ return OpCode.NOT_LT; // !<
+ }
+ if (ch2 == '=') { // !=
+ _pos++;
+ return OpCode.NOT_EQ;
+ }
+ return OpCode.NOT;
+ // PAR_OPEN, // (
+ case '(':
+ return OpCode.PAR_OPEN;
+ // PAR_CLOSE, // )
+ case ')':
+ return OpCode.PAR_CLOSE;
+ // SQ_OPEN, // [
+ case '[':
+ return OpCode.SQ_OPEN;
+ // SQ_CLOSE, // ]
+ case ']':
+ return OpCode.SQ_CLOSE;
+ // CURL_OPEN, // {
+ case '{':
+ return OpCode.CURL_OPEN;
+ // CURL_CLOSE, // }
+ case '}':
+ return OpCode.CURL_CLOSE;
+ // QUEST, // ?
+ case '?':
+ return OpCode.QUEST;
+ // COMMA, // ,
+ case ',':
+ return OpCode.COMMA;
+ // SEMICOLON, // ;
+ case ';':
+ return OpCode.SEMICOLON;
+ // COLON, // :
+ case ':':
+ return OpCode.COLON;
+ // DOLLAR, // $
+ case '$':
+ return OpCode.DOLLAR;
+ // EQ, // =
+ // QE_EQ, // ==
+ // EQ_GT, // =>
+ case '=':
+ if (ch2 == '=') { // ==
+ _pos++;
+ return OpCode.QE_EQ;
+ }
+ if (ch2 == '>') { // =>
+ _pos++;
+ return OpCode.EQ_GT;
+ }
+ return OpCode.EQ;
+ // MUL, // *
+ // MUL_EQ, // *=
+ case '*':
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.MUL_EQ;
+ }
+ return OpCode.MUL;
+ // MOD, // %
+ // MOD_EQ, // %=
+ case '%':
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.MOD_EQ;
+ }
+ return OpCode.MOD;
+ // XOR, // ^
+ // XOR_EQ, // ^=
+ // LOG_XOR, // ^^
+ // LOG_XOR_EQ, // ^^=
+ case '^':
+ if (ch2 == '^') {
+ if (ch3 == '=') {
+ _pos += 2;
+ return OpCode.LOG_XOR_EQ;
+ }
+ _pos++;
+ return OpCode.LOG_XOR;
+ }
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.XOR_EQ;
+ }
+ return OpCode.XOR;
+ // INV, // ~
+ // INV_EQ, // ~=
+ case '~':
+ if (ch2 == '=') {
+ _pos++;
+ return OpCode.INV_EQ;
+ }
+ return OpCode.INV;
+ // AT, // @
+ case '@':
+ return OpCode.AT;
+ // SHARP // #
+ case '#':
+ return OpCode.SHARP;
+ default:
+ return OpCode.NONE;
+ }
+ }
+
+ Token processDoubleQuotedOrWysiwygString(dchar delimiter) {
+ bool wysiwyg = (delimiter == 'r' || delimiter == '`');
+ //writeln("processDoubleQuotedString()");
+ _sharedStringLiteralToken.setPos(_line, _pos - 1);
+ _stringLiteralAppender.reset();
+ if (delimiter == 'r') {
+ _pos++;
+ delimiter = '\"';
+ }
+ dchar type = 0;
+ for (;;) {
+ uint i = _pos;
+ uint endPos = uint.max;
+ for(; i < _len; i++) {
+ if (_lineText[i] == delimiter && (i == 0 || _lineText[i - 1] != '\\')) {
+ endPos = i;
+ break;
+ }
+ }
+ if (endPos != uint.max) {
+ // found end quote
+ _stringLiteralAppender.append(_lineText[_pos .. endPos]);
+ _pos = endPos + 1;
+ break;
+ }
+ // no quote by end of line
+ _stringLiteralAppender.append(_lineText[_pos .. $]);
+ _stringLiteralAppender.appendEol();
+ if (!nextLine()) {
+ // do we need to throw exception if eof comes before end of string?
+ break;
+ }
+ }
+ dchar t = 0;
+ if (_pos < _len) {
+ dchar ch = _lineText[_pos];
+ if (ch == 'c' || ch == 'w' || ch == 'd')
+ t = ch;
+ else if (isIdentMiddleChar(ch))
+ parserError("Unexpected character after string literal");
+ }
+ if (t != 0) {
+ if (type != 0 && t != type)
+ parserError("Cannot concatenate strings of different type");
+ type = t;
+ }
+ if (!wysiwyg) {
+ // no escape processing
+ _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
+ return _sharedStringLiteralToken;
+ }
+ // TODO: process escape sequences
+ _sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
+ return _sharedStringLiteralToken;
+ }
+
+ SysTime buildTime;
+
+ // string literal of the date of compilation "mmm dd yyyy"
+ dstring formatBuildDate() {
+ // TODO: provide proper format
+ return to!dstring(buildTime);
+ }
+
+ // string literal of the time of compilation "hh:mm:ss"
+ dstring formatBuildTime() {
+ // TODO: provide proper format
+ return to!dstring(buildTime);
+ }
+
+ // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
+ dstring formatBuildTimestamp() {
+ // TODO: provide proper format
+ return to!dstring(buildTime);
+ }
+
+ static immutable dstring VERSION = "0.1";
+ static immutable dstring VENDOR = "coolreader.org";
+
+ Token makeSpecialTokenString(dstring str, uint pos) {
+ _sharedStringLiteralToken.setPos(_line, pos);
+ _sharedStringLiteralToken.setText(cast(dchar[])str, 0);
+ return _sharedStringLiteralToken;
+ }
+
+ Token processSpecialToken(Keyword keyword, uint pos) {
+ switch (keyword) {
+ //Special Token Replaced with
+ case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy"
+ return makeSpecialTokenString(formatBuildDate(), pos);
+ case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss"
+ return makeSpecialTokenString(formatBuildTime(), pos);
+ case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
+ return makeSpecialTokenString(formatBuildTimestamp(), pos);
+ case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D"
+ return makeSpecialTokenString(VENDOR, pos);
+ case Keyword.VERSION_: // Compiler version as an integer, such as 2001
+ return makeSpecialTokenString(VERSION, pos);
+ default:
+ parserError("Unexpected token");
+ }
+ return null;
+ }
+
+ // returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls).
+ public Token nextToken() {
+ dchar ch = nextChar();
+ if (ch == EOF_CHAR) {
+ return emitEof();
+ }
+ if (ch == EOL_CHAR || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) {
+ // white space (treat EOL as whitespace, too)
+ return processWhiteSpace(ch);
+ }
+ dchar next = _pos < _len ? _lineText[_pos] : 0;
+ if (ch == '/') {
+ if (next == '/')
+ return processOneLineComment();
+ else if (next == '*')
+ return processMultilineComment();
+ else if (next == '+')
+ return processNestedComment();
+ }
+ if (ch == '\"')
+ return processDoubleQuotedOrWysiwygString(ch);
+ if (ch == 'x' && next == '\"')
+ return processHexString();
+ if (ch == 'q' && next == '\"')
+ return processDelimitedString();
+ if ((ch == 'r' && next == '\"') || (ch == '`'))
+ return processDoubleQuotedOrWysiwygString(ch);
+ uint oldPos = _pos - 1;
+
+ if (ch == '0') {
+ if (next == 'b' || next == 'B')
+ return processBinaryNumber();
+ if (next == 'x' || next == 'X')
+ return processHexNumber();
+ if (next >= '0' && next <= '9')
+ return processOctNumber();
+ if (next >= '0' && next <= '9')
+ return processDecNumber(ch);
+ }
+ if (ch >= '0' && ch <= '9')
+ return processDecNumber(ch);
+ if (ch == '.' && next >= '0' && next <= '9') // .123
+ return processDecFloatSecondPart(0);
+
+ if (ch == '_' || isUniversalAlpha(ch)) {
+ // start of identifier or keyword?
+ Keyword keyword = detectKeyword(ch);
+ if (keyword != Keyword.NONE) {
+ switch (keyword) {
+ //Special Token Replaced with
+ case Keyword.EOF: return emitEof(); // sets the scanner to the end of the file
+ case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy"
+ case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss"
+ case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
+ case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D"
+ case Keyword.VERSION_: // Compiler version as an integer, such as 2001
+ return processSpecialToken(keyword, oldPos);
+ default:
+ _sharedKeywordToken.setPos(_line, oldPos);
+ _sharedKeywordToken.keyword = keyword;
+ return _sharedKeywordToken;
+ }
+ }
+ return processIdent();
+ }
+ OpCode op = detectOp(ch);
+ if (op != OpCode.NONE) {
+ _sharedOpToken.setPos(_line, oldPos);
+ _sharedOpToken.opCode = op;
+ return _sharedOpToken;
+ }
+ return null;
+ }
+
+
+}
+
+unittest {
+ import std.algorithm;
+ class TokenTest {
+ uint _line;
+ string _file;
+ this(string file, uint line) {
+ _file = file;
+ _line = line;
+ }
+ bool doTest(Token token) {
+ return true;
+ }
+ void execute(Tokenizer tokenizer) {
+ Token token = tokenizer.nextToken();
+ if (!doTest(token)) {
+ assert(false, " token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ " foundToken: " ~ token.toString ~ " expected: " ~ toString);
+ }
+ }
+ public override @property string toString() {
+ return "TokenTest";
+ }
+ }
+ void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) {
+ Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line));
+ for (uint i = 0; i < tokens.length; i++) {
+ tokens[i].execute(tokenizer);
+ }
+ }
+ class KeywordTest : TokenTest {
+ Keyword _code;
+ this(Keyword code, string file = __FILE__, uint line = __LINE__) {
+ super(file, line);
+ _code = code;
+ }
+ override bool doTest(Token token) {
+ if (token.type != TokenType.KEYWORD)
+ return false;
+ if (token.keyword != _code)
+ return false;
+ return true;
+ }
+ public override @property string toString() {
+ return "Keyword:" ~ to!string(_code);
+ }
+ }
+ class OpTest : TokenTest {
+ OpCode _code;
+ this(OpCode code, string file = __FILE__, uint line = __LINE__) {
+ super(file, line);
+ _code = code;
+ }
+ override bool doTest(Token token) {
+ if (token.type != TokenType.OP)
+ return false;
+ if (token.opCode != _code)
+ return false;
+ return true;
+ }
+ public override @property string toString() {
+ return "Op:" ~ to!string(_code);
+ }
+ }
+ class StringTest : TokenTest {
+ string _value;
+ this(string value, string file = __FILE__, uint line = __LINE__) {
+ super(file, line);
+ _value = value;
+ }
+ override bool doTest(Token token) {
+ if (token.type != TokenType.STRING)
+ return false;
+ if (to!string(token.text).equal(_value))
+ return false;
+ return true;
+ }
+ public override @property string toString() {
+ return "String:" ~ _value;
+ }
+ }
+ class IntegerTest : TokenTest {
+ ulong _value;
+ bool _unsigned;
+ bool _long;
+ this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) {
+ super(file, line);
+ _value = value;
+ _unsigned = unsignedFlag;
+ _long = longFlag;
+ }
+ override bool doTest(Token token) {
+ if (token.type != TokenType.INTEGER)
+ return false;
+ if (token.intValue != _value)
+ return false;
+ if (token.isUnsigned != _unsigned)
+ return false;
+ if (token.isLong != _long)
+ return false;
+ return true;
+ }
+ public override @property string toString() {
+ return "Integer:" ~ to!string(_value);
+ }
+ }
+ class RealTest : TokenTest {
+ real _value;
+ ubyte _precision;
+ bool _imaginary;
+ this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) {
+ super(file, line);
+ _value = value;
+ _precision = precision;
+ _imaginary = imaginary;
+ }
+ override bool doTest(Token token) {
+ if (token.type != TokenType.FLOAT)
+ return false;
+ if (token.realValue != _value)
+ return false;
+ if (token.precision != _precision)
+ return false;
+ if (token.isImaginary != _imaginary)
+ return false;
+ return true;
+ }
+ public override @property string toString() {
+ return "Real:" ~ to!string(_value);
+ }
+ }
+ class IdentTest : TokenTest {
+ string _value;
+ this(string value, string file = __FILE__, uint line = __LINE__) {
+ super(file, line);
+ _value = value;
+ }
+ override bool doTest(Token token) {
+ if (token.type != TokenType.IDENTIFIER)
+ return false;
+ if (! to!string(token.text).equal(_value))
+ return false;
+ return true;
+ }
+ public override @property string toString() {
+ return "Ident:" ~ _value;
+ }
+ }
+ class CommentTest : TokenTest {
+ this(string file = __FILE__, uint line = __LINE__) {
+ super(file, line);
+ }
+ override bool doTest(Token token) {
+ if (token.type != TokenType.COMMENT)
+ return false;
+ return true;
+ }
+ public override @property string toString() {
+ return "Comment";
+ }
+ }
+ class EOFTest : TokenTest {
+ this(string file = __FILE__, uint line = __LINE__) {
+ super(file, line);
+ }
+ override bool doTest(Token token) {
+ if (token.type != TokenType.EOF)
+ return false;
+ return true;
+ }
+ public override @property string toString() {
+ return "EOF";
+ }
+ }
+ class WhiteSpaceTest : TokenTest {
+ this(string file = __FILE__, uint line = __LINE__) {
+ super(file, line);
+ }
+ override bool doTest(Token token) {
+ if (token.type != TokenType.WHITESPACE)
+ return false;
+ return true;
+ }
+ public override @property string toString() {
+ return "whiteSpace";
+ }
+ }
+ TokenTest checkString(string value, string file = __FILE__, uint line = __LINE__) {
+ return new StringTest(value, file, line);
+ }
+ TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) {
+ return new IntegerTest(value, unsignedFlag, longFlag, file, line);
+ }
+ TokenTest checkReal(real value, byte precision = 0, bool imaginary = false, string file = __FILE__, uint line = __LINE__) {
+ return new RealTest(value, precision, imaginary, file, line);
+ }
+ TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) {
+ return new IdentTest(value, file, line);
+ }
+ TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) {
+ return new KeywordTest(value, file, line);
+ }
+ TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) {
+ return new OpTest(value, file, line);
+ }
+ TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) {
+ return new WhiteSpaceTest(file, line);
+ }
+ TokenTest checkComment(string file = __FILE__, uint line = __LINE__) {
+ return new CommentTest(file, line);
+ }
+ TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) {
+ return new EOFTest(file, line);
+ }
+
+ testTokenizer(q"TEST
+int i;
+TEST"
+ , [
+ checkKeyword(Keyword.INT),
+ checkSpace(),
+ checkIdent("i"),
+ checkOp(OpCode.SEMICOLON),
+ checkEOF()
+ ]);
+ testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25"
+ , [
+ checkInteger(13),
+ checkSpace(),
+ checkInteger(0x123abcd, true, false),
+ checkSpace(),
+ checkInteger(0xabc, false, true),
+ checkSpace(),
+ checkInteger(std.conv.octal!743),
+ checkSpace(),
+ checkInteger(192_837_465),
+ checkSpace(),
+ checkInteger(0),
+ checkSpace(),
+ checkInteger(192837465),
+ checkSpace(),
+ checkReal(5.25),
+ checkEOF()
+ ]);
+}
+
+unittest {
+ import std.stdio;
+ import std.conv;
+ import std.utf;
+ import ddx.lexer.LineStream;
+ string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d";
+ writeln("opening file");
+ try {
+ std.stream.File f = new std.stream.File(fname);
+ scope(exit) { f.close(); }
+ try {
+ LineStream lines = LineStream.create(f, fname);
+ Tokenizer tokenizer = new Tokenizer(lines);
+ for (;;) {
+ Token token = tokenizer.nextToken();
+ if (token is null) {
+ writeln("Null token returned");
+ break;
+ }
+ if (token.type == TokenType.EOF) {
+ writeln("EOF token");
+ break;
+ }
+ writeln("", token.line, ":", token.pos, "\t", token.toString);
+ }
+ } catch (Exception e) {
+ writeln("Exception " ~ e.toString);
+ }
+ } catch (Exception e) {
+ writeln("Exception " ~ e.toString);
+ }
+}
diff --git a/src/ddc/lexer/exceptions.d b/src/ddc/lexer/exceptions.d
new file mode 100644
index 0000000..1934d3c
--- /dev/null
+++ b/src/ddc/lexer/exceptions.d
@@ -0,0 +1,32 @@
+module ddc.lexer.exceptions;
+
+import std.conv;
+
+class ParserException : Exception {
+ string _msg;
+ string _filename;
+ size_t _line;
+ size_t _pos;
+
+ public @property size_t line() { return _line; }
+
+ this(string msg, string filename, size_t line, size_t pos) {
+ super(msg ~ " at " ~ filename ~ " line " ~ to!string(line) ~ " column " ~ to!string(pos));
+ _msg = msg;
+ _filename = filename;
+ _line = line;
+ _pos = pos;
+ }
+}
+
+class LexerException : ParserException {
+ this(string msg, string filename, size_t line, size_t pos) {
+ super(msg, filename, line, pos);
+ }
+}
+
+class SourceEncodingException : LexerException {
+ this(string msg, string filename, size_t line, size_t pos) {
+ super(msg, filename, line, pos);
+ }
+}
diff --git a/src/ddc/lexer/textsource.d b/src/ddc/lexer/textsource.d
new file mode 100644
index 0000000..060ea40
--- /dev/null
+++ b/src/ddc/lexer/textsource.d
@@ -0,0 +1,103 @@
+module ddc.lexer.textsource;
+
+private import std.utf;
+private import std.array;
+
+/**
+* Source file information.
+* Even if contains only file name, it's better to use it instead of string - object reference size is twice less than array ref.
+*/
+class SourceFile {
+ protected string _file;
+ public @property string filename() { return _file; }
+ public this(string filename) {
+ _file = filename;
+ }
+}
+
+/// source lines for tokenizer
+interface SourceLines {
+ /// source file
+ @property SourceFile file();
+ /// last read line
+ @property uint line();
+ /// source encoding
+ //@property EncodingType encoding() { return _encoding; }
+ /// error code
+ @property int errorCode();
+ /// error message
+ @property string errorMessage();
+ /// error line
+ @property int errorLine();
+ /// error position
+ @property int errorPos();
+
+ /// read line, return null if EOF reached or error occured
+ dchar[] readLine();
+}
+
+/// Simple text source based on array
+class ArraySourceLines : SourceLines {
+ protected SourceFile _file;
+ protected uint _line;
+ protected uint _firstLine;
+ protected dstring[] _lines;
+ static protected dchar[] _emptyLine = ""d.dup;
+
+ this() {
+ }
+
+ this(dstring[] lines, SourceFile file, uint firstLine = 0) {
+ init(lines, file, firstLine);
+ }
+
+ this(string code, string filename) {
+ _lines = (toUTF32(code)).split("\n");
+ _file = new SourceFile(filename);
+ }
+
+ void close() {
+ _lines = null;
+ _line = 0;
+ _firstLine = 0;
+ _file = null;
+ }
+
+ void init(dstring[] lines, SourceFile file, uint firstLine = 0) {
+ _lines = lines;
+ _firstLine = firstLine;
+ _line = 0;
+ _file = file;
+ }
+
+ bool reset(int line) {
+ _line = line;
+ return true;
+ }
+
+ /// source file
+ override @property SourceFile file() { return _file; }
+ /// last read line
+ override @property uint line() { return _line; }
+ /// source encoding
+ //@property EncodingType encoding() { return _encoding; }
+ /// error code
+ override @property int errorCode() { return 0; }
+ /// error message
+ override @property string errorMessage() { return ""; }
+ /// error line
+ override @property int errorLine() { return 0; }
+ /// error position
+ override @property int errorPos() { return 0; }
+
+ /// read line, return null if EOF reached or error occured
+ override dchar[] readLine() {
+ if (_line < _lines.length) {
+ if (_lines[_line])
+ return cast(dchar[])_lines[_line++];
+ _line++;
+ return _emptyLine;
+ }
+ return null; // EOF
+ }
+}
diff --git a/src/dlangide/ui/frame.d b/src/dlangide/ui/frame.d
index 59f4ef9..b19d936 100644
--- a/src/dlangide/ui/frame.d
+++ b/src/dlangide/ui/frame.d
@@ -17,8 +17,84 @@ import dlangide.ui.wspanel;
import dlangide.workspace.workspace;
import dlangide.workspace.project;
+import ddc.lexer.textsource;
+import ddc.lexer.exceptions;
+import ddc.lexer.Tokenizer;
+
import std.conv;
import std.utf;
+import std.algorithm;
+
+class SimpleDSyntaxHighlighter : SyntaxHighlighter {
+
+ SourceFile _file;
+ ArraySourceLines _lines;
+ Tokenizer _tokenizer;
+ this (string filename) {
+ _file = new SourceFile(filename);
+ _lines = new ArraySourceLines();
+ _tokenizer = new Tokenizer(_lines);
+ }
+
+ TokenPropString[] _props;
+
+ /// categorize characters in content by token types
+ void updateHighlight(dstring[] lines, TokenPropString[] props, int changeStartLine, int changeEndLine) {
+ _props = props;
+ changeStartLine = 0;
+ changeEndLine = lines.length;
+ _lines.init(lines[changeStartLine..$], _file, changeStartLine);
+ _tokenizer.init(_lines);
+ uint tokenPos = 0;
+ uint tokenLine = 0;
+ ubyte category = 0;
+ for (;;) {
+ Token token = _tokenizer.nextToken();
+ if (token is null) {
+ //writeln("Null token returned");
+ break;
+ }
+ if (token.type == TokenType.EOF) {
+ //writeln("EOF token");
+ break;
+ }
+ uint newPos = token.pos;
+ uint newLine = token.line;
+
+ if (category) {
+ // fill with category
+ for (uint i = tokenLine - 1; i <= newLine - 1; i++) {
+ uint start = i > tokenLine - 1 ? 0 : tokenPos;
+ uint end = i < newLine - 1 ? lines[i].length : tokenPos;
+ for (uint j = start; j < end; j++) {
+ assert(i < _props.length);
+ if (j - 1 < _props[i].length)
+ _props[i][j - 1] = category;
+ }
+ }
+ }
+
+ TokenType t = token.type;
+ // handle token
+ if (t == TokenType.COMMENT) {
+ category = TokenCategory.Comment;
+ } else if (t == TokenType.KEYWORD) {
+ category = TokenCategory.Keyword;
+ } else if (t == TokenType.IDENTIFIER) {
+ category = TokenCategory.Identifier;
+ } else if (t == TokenType.STRING) {
+ category = TokenCategory.String;
+ } else {
+ category = 0;
+ }
+ tokenPos = newPos;
+ tokenLine= newLine;
+
+ }
+ _lines.close();
+ _props = null;
+ }
+}
/// DIDE source file editor
class DSourceEdit : SourceEdit {
@@ -26,6 +102,10 @@ class DSourceEdit : SourceEdit {
super(ID);
styleId = null;
backgroundColor = 0xFFFFFF;
+ setTokenHightlightColor(TokenCategory.Comment, 0x808080); // gray
+ setTokenHightlightColor(TokenCategory.Keyword, 0x0020C0); // blue
+ setTokenHightlightColor(TokenCategory.String, 0xC02000); // red
+ setTokenHightlightColor(TokenCategory.Identifier, 0x206000); // green
}
this() {
this("SRCEDIT");
@@ -34,8 +114,20 @@ class DSourceEdit : SourceEdit {
@property ProjectSourceFile projectSourceFile() { return _projectSourceFile; }
/// load by filename
override bool load(string fn) {
- return super.load(fn);
+ _projectSourceFile = null;
+ bool res = super.load(fn);
+ setHighlighter();
+ return res;
}
+
+ void setHighlighter() {
+ if (filename.endsWith(".d") || filename.endsWith(".dd") || filename.endsWith(".dh") || filename.endsWith(".ddoc")) {
+ content.syntaxHighlighter = new SimpleDSyntaxHighlighter(filename);
+ } else {
+ content.syntaxHighlighter = null;
+ }
+ }
+
/// load by project item
bool load(ProjectSourceFile f) {
if (!load(f.filename)) {
@@ -43,6 +135,7 @@ class DSourceEdit : SourceEdit {
return false;
}
_projectSourceFile = f;
+ setHighlighter();
return true;
}
}