diff --git a/build.sh b/build.sh index 02774dd..1aac63a 100755 --- a/build.sh +++ b/build.sh @@ -1,4 +1,4 @@ #dmd *.d std/d/*.d -release -inline -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline #dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -unittest -#ldc2 -O3 *.d std/d/*.d -of=dscanner -release -vectorize -m64 -ldc2 *.d std/d/*.d -of=dscanner -unittest -m64 -g +ldc2 -O2 *.d std/d/*.d -of=dscanner -release -vectorize -m64 +#ldc2 *.d std/d/*.d -of=dscanner -unittest -m64 -g diff --git a/std/d/lexer.d b/std/d/lexer.d index b9664c4..addb4af 100644 --- a/std/d/lexer.d +++ b/std/d/lexer.d @@ -1,110 +1,110 @@ // Written in the D programming language /** - * This module contains a range-based _lexer for the D programming language. - * - * For performance reasons the _lexer contained in this module operates only on - * ASCII and UTF-8 encoded source code. If the use of other encodings is - * desired, the source code must be converted to UTF-8 before passing it to this - * _lexer. - * - * To use the _lexer, create a LexerConfig struct - * --- - * LexerConfig config; - * config.iterStyle = IterationStyle.everything; - * config.tokenStyle = IterationStyle.source; - * config.versionNumber = 2061; - * config.vendorString = "Lexer Example"; - * --- - * Once you have configured the _lexer, call byToken$(LPAREN)$(RPAREN) on your - * source code, passing in the configuration. - * --- - * auto source = "import std.stdio;"c; - * auto tokens = byToken(source, config); - * --- - * The result of byToken$(LPAREN)$(RPAREN) is a forward range of tokens that can - * be used easily with the algorithms from std.algorithm or iterated over with - * $(D_KEYWORD foreach) - * --- - * assert (tokens.front.type == TokenType.import_); - * assert (tokens.front.value == "import"); - * assert (tokens.front.line == 1); - * assert (tokens.front.startIndex == 0); - * --- - * - * Examples: - * - * Generate HTML markup of D code. - * --- - * module highlighter; - * - * import std.stdio; - * import std.array; - * import std.d.lexer; - * - * void writeSpan(string cssClass, string value) - * { - * stdout.write(``, value.replace("&", "&").replace("<", "<"), ``); - * } - * - * - * // http://ethanschoonover.com/solarized - * void highlight(R)(R tokens) - * { - * stdout.writeln(q"[ - * - *
- * - * - * - * - *]");
- *
- * foreach (Token t; tokens)
- * {
- * if (isType(t.type))
- * writeSpan("type", t.value);
- * else if (isKeyword(t.type))
- * writeSpan("kwrd", t.value);
- * else if (t.type == TokenType.comment)
- * writeSpan("com", t.value);
- * else if (isStringLiteral(t.type))
- * writeSpan("str", t.value);
- * else if (isNumberLiteral(t.type))
- * writeSpan("num", t.value);
- * else if (isOperator(t.type))
- * writeSpan("op", t.value);
- * else
- * stdout.write(t.value.replace("<", "<"));
- * }
- * stdout.writeln("\n");
- * }
- *
- * void main(string[] args)
- * {
- * LexerConfig config;
- * config.tokenStyle = TokenStyle.source;
- * config.iterStyle = IterationStyle.everything;
- * config.fileName = args[1];
- * auto f = File(args[1]);
- * (cast(ubyte[]) f.byLine(KeepTerminator.yes).join()).byToken(config).highlight();
- * }
- * ---
- *
- * Copyright: Brian Schott 2013
- * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
- * Authors: Brian Schott
- * Source: $(PHOBOSSRC std/d/_lexer.d)
- */
+* This module contains a range-based _lexer for the D programming language.
+*
+* For performance reasons the _lexer contained in this module operates only on
+* ASCII and UTF-8 encoded source code. If the use of other encodings is
+* desired, the source code must be converted to UTF-8 before passing it to this
+* _lexer.
+*
+* To use the _lexer, create a LexerConfig struct
+* ---
+* LexerConfig config;
+* config.iterStyle = IterationStyle.everything;
+* config.tokenStyle = IterationStyle.source;
+* config.versionNumber = 2061;
+* config.vendorString = "Lexer Example";
+* ---
+* Once you have configured the _lexer, call byToken$(LPAREN)$(RPAREN) on your
+* source code, passing in the configuration.
+* ---
+* auto source = "import std.stdio;"c;
+* auto tokens = byToken(source, config);
+* ---
+* The result of byToken$(LPAREN)$(RPAREN) is a forward range of tokens that can
+* be used easily with the algorithms from std.algorithm or iterated over with
+* $(D_KEYWORD foreach)
+* ---
+* assert (tokens.front.type == TokenType.import_);
+* assert (tokens.front.value == "import");
+* assert (tokens.front.line == 1);
+* assert (tokens.front.startIndex == 0);
+* ---
+*
+* Examples:
+*
+* Generate HTML markup of D code.
+* ---
+* module highlighter;
+*
+* import std.stdio;
+* import std.array;
+* import std.d.lexer;
+*
+* void writeSpan(string cssClass, string value)
+* {
+* stdout.write(``, value.replace("&", "&").replace("<", "<"), ``);
+* }
+*
+*
+* // http://ethanschoonover.com/solarized
+* void highlight(R)(R tokens)
+* {
+* stdout.writeln(q"[
+*
+*
+*
+*
+*
+*
+* ]");
+*
+* foreach (Token t; tokens)
+* {
+* if (isType(t.type))
+* writeSpan("type", t.value);
+* else if (isKeyword(t.type))
+* writeSpan("kwrd", t.value);
+* else if (t.type == TokenType.comment)
+* writeSpan("com", t.value);
+* else if (isStringLiteral(t.type))
+* writeSpan("str", t.value);
+* else if (isNumberLiteral(t.type))
+* writeSpan("num", t.value);
+* else if (isOperator(t.type))
+* writeSpan("op", t.value);
+* else
+* stdout.write(t.value.replace("<", "<"));
+* }
+* stdout.writeln("\n");
+* }
+*
+* void main(string[] args)
+* {
+* LexerConfig config;
+* config.tokenStyle = TokenStyle.source;
+* config.iterStyle = IterationStyle.everything;
+* config.fileName = args[1];
+* auto f = File(args[1]);
+* (cast(ubyte[]) f.byLine(KeepTerminator.yes).join()).byToken(config).highlight();
+* }
+* ---
+*
+* Copyright: Brian Schott 2013
+* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt Boost, License 1.0)
+* Authors: Brian Schott
+* Source: $(PHOBOSSRC std/d/_lexer.d)
+*/
module std.d.lexer;
@@ -123,844 +123,847 @@ import std.utf;
public:
/**
- * Represents a D token
- */
+* Represents a D token
+*/
struct Token
{
- /**
- * The token type.
- */
- TokenType type;
+ /**
+ * The token type.
+ */
+ TokenType type;
- /**
- * The representation of the token in the original source code.
- */
- string value;
+ /**
+ * The representation of the token in the original source code.
+ */
+ string value;
- /**
- * The number of the line the token is on.
- */
- uint line;
+ /**
+ * The number of the line the token is on.
+ */
+ uint line;
- /**
- * The column number of the start of the token in the original source.
- * $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN)
- */
- uint column;
+ /**
+ * The column number of the start of the token in the original source.
+ * $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN)
+ */
+ uint column;
- /**
- * The index of the start of the token in the original source.
- * $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN)
- */
- size_t startIndex;
+ /**
+ * The index of the start of the token in the original source.
+ * $(LPAREN)measured in ASCII characters or UTF-8 code units$(RPAREN)
+ */
+ size_t startIndex;
- /**
- * Check to see if the token is of the same type and has the same string
- * representation as the given token.
- */
- bool opEquals(ref const(Token) other) const
- {
- return other.type == type && other.value == value;
- }
+ /**
+ * Check to see if the token is of the same type and has the same string
+ * representation as the given token.
+ */
+ bool opEquals(ref const(Token) other) const
+ {
+ return other.type == type && other.value == value;
+ }
- /**
- * Checks to see if the token's string representation is equal to the given
- * string.
- */
- bool opEquals(string value) const { return this.value == value; }
+ /**
+ * Checks to see if the token's string representation is equal to the given
+ * string.
+ */
+ bool opEquals(string value) const { return this.value == value; }
- /**
- * Checks to see if the token is of the given type.
- */
- bool opEquals(TokenType type) const { return type == type; }
+ /**
+ * Checks to see if the token is of the given type.
+ */
+ bool opEquals(TokenType type) const { return type == type; }
- /**
- * Comparison operator orders tokens by start index.
- */
- int opCmp(size_t i) const
- {
- if (startIndex < i) return -1;
- if (startIndex > i) return 1;
- return 0;
- }
+ /**
+ * Comparison operator orders tokens by start index.
+ */
+ int opCmp(size_t i) const
+ {
+ if (startIndex < i) return -1;
+ if (startIndex > i) return 1;
+ return 0;
+ }
}
/**
- * Configure the behavior of the byToken() function. These flags may be
- * combined using a bitwise or.
- */
+* Configure the behavior of the byToken() function. These flags may be
+* combined using a bitwise or.
+*/
enum IterationStyle
{
- /// Only include code, not whitespace or comments
- codeOnly = 0,
- /// Includes comments
- includeComments = 0b0001,
- /// Includes whitespace
- includeWhitespace = 0b0010,
- /// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
- includeSpecialTokens = 0b0100,
- /// Do not stop iteration on reaching the ___EOF__ token
- ignoreEOF = 0b1000,
- /// Include everything
- everything = includeComments | includeWhitespace | ignoreEOF
+ /// Only include code, not whitespace or comments
+ codeOnly = 0,
+ /// Includes comments
+ includeComments = 0b0001,
+ /// Includes whitespace
+ includeWhitespace = 0b0010,
+ /// Include $(LINK2 http://dlang.org/lex.html#specialtokens, special tokens)
+ includeSpecialTokens = 0b0100,
+ /// Do not stop iteration on reaching the ___EOF__ token
+ ignoreEOF = 0b1000,
+ /// Include everything
+ everything = includeComments | includeWhitespace | ignoreEOF
}
/**
- * Configuration of the token lexing style. These flags may be combined with a
- * bitwise or.
- */
+* Configuration of the token lexing style. These flags may be combined with a
+* bitwise or.
+*/
enum TokenStyle : uint
{
- /**
- * Escape sequences will be replaced with their equivalent characters,
- * enclosing quote characters will not be included. Special tokens such as
- * __VENDOR__ will be replaced with their equivalent strings. Useful for
- * creating a compiler or interpreter.
- */
- default_ = 0b0000,
+ /**
+ * Escape sequences will be replaced with their equivalent characters,
+ * enclosing quote characters will not be included. Special tokens such as
+ * __VENDOR__ will be replaced with their equivalent strings. Useful for
+ * creating a compiler or interpreter.
+ */
+ default_ = 0b0000,
- /**
- * Escape sequences will not be processed. An escaped quote character will
- * not terminate string lexing, but it will not be replaced with the quote
- * character in the token.
- */
- notEscaped = 0b0001,
+ /**
+ * Escape sequences will not be processed. An escaped quote character will
+ * not terminate string lexing, but it will not be replaced with the quote
+ * character in the token.
+ */
+ notEscaped = 0b0001,
- /**
- * Strings will include their opening and closing quote characters as well
- * as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will
- * include the $(D_STRING 'w') character as well as the opening and closing
- * quotes$(RPAREN)
- */
- includeQuotes = 0b0010,
+ /**
+ * Strings will include their opening and closing quote characters as well
+ * as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will
+ * include the $(D_STRING 'w') character as well as the opening and closing
+ * quotes$(RPAREN)
+ */
+ includeQuotes = 0b0010,
- /**
- * Do not replace the value field of the special tokens such as ___DATE__
- * with their string equivalents.
- */
- doNotReplaceSpecial = 0b0100,
+ /**
+ * Do not replace the value field of the special tokens such as ___DATE__
+ * with their string equivalents.
+ */
+ doNotReplaceSpecial = 0b0100,
- /**
- * Strings will be read exactly as they appeared in the source, including
- * their opening and closing quote characters. Useful for syntax
- * highlighting.
- */
- source = notEscaped | includeQuotes | doNotReplaceSpecial
+ /**
+ * Strings will be read exactly as they appeared in the source, including
+ * their opening and closing quote characters. Useful for syntax
+ * highlighting.
+ */
+ source = notEscaped | includeQuotes | doNotReplaceSpecial
}
/**
- * Lexer configuration
- */
+* Lexer configuration
+*/
struct LexerConfig
{
- /**
- * Iteration style
- */
- IterationStyle iterStyle = IterationStyle.codeOnly;
+ /**
+ * Iteration style
+ */
+ IterationStyle iterStyle = IterationStyle.codeOnly;
- /**
- * Token style
- */
- TokenStyle tokenStyle = tokenStyle.default_;
+ /**
+ * Token style
+ */
+ TokenStyle tokenStyle = tokenStyle.default_;
- /**
- * Replacement for the ___VERSION__ token. Defaults to 1.
- */
- uint versionNumber = 100;
+ /**
+ * Replacement for the ___VERSION__ token. Defaults to 1.
+ */
+ uint versionNumber = 100;
- /**
- * Replacement for the ___VENDOR__ token. Defaults to $(D_STRING "std.d.lexer")
- */
- string vendorString = "std.d.lexer";
+ /**
+ * Replacement for the ___VENDOR__ token. Defaults to $(D_STRING "std.d.lexer")
+ */
+ string vendorString = "std.d.lexer";
- /**
- * Name used when creating error messages that are sent to errorFunc. This
- * is needed because the lexer operates on any forwarad range of ASCII
- * characters or UTF-8 code units and does not know what to call its input
- * source. Defaults to the empty string.
- */
- string fileName = "";
+ /**
+ * Name used when creating error messages that are sent to errorFunc. This
+ * is needed because the lexer operates on any forwarad range of ASCII
+ * characters or UTF-8 code units and does not know what to call its input
+ * source. Defaults to the empty string.
+ */
+ string fileName = "";
- /**
- * This function is called when an error is encountered during lexing.
- * Parameters are file name, code uint index, line number, column,
- * and error messsage.
- */
- void delegate(string, size_t, uint, uint, string) errorFunc;
+ /**
+ * This function is called when an error is encountered during lexing.
+ * Parameters are file name, code uint index, line number, column,
+ * and error messsage.
+ */
+ void delegate(string, size_t, uint, uint, string) errorFunc;
- /**
- * Initial size of the lexer's internal token buffer in bytes. The lexer
- * will grow this buffer if necessary.
- */
- size_t bufferSize = 1024 * 4;
+ /**
+ * Initial size of the lexer's internal token buffer in bytes. The lexer
+ * will grow this buffer if necessary.
+ */
+ size_t bufferSize = 1024 * 4;
}
/**
- * Iterate over the given range of characters by D tokens.
- * Params:
- * range = the range of characters
- * config = the lexer configuration
- * Returns:
- * an input range of tokens
- */
+* Iterate over the given range of characters by D tokens.
+* Params:
+* range = the range of characters
+* config = the lexer configuration
+* Returns:
+* an input range of tokens
+*/
TokenRange!(R) byToken(R)(R range, LexerConfig config) if (isForwardRange!(R))
{
- auto r = TokenRange!(R)(range);
- r.config = config;
- r.lineNumber = 1;
- r.popFront();
- return r;
+ auto r = TokenRange!(R)(range);
+ r.config = config;
+ r.lineNumber = 1;
+ r.popFront();
+ return r;
}
/**
- * Range of tokens. Use byToken$(LPAREN)$(RPAREN) to instantiate.
- */
+* Range of tokens. Use byToken$(LPAREN)$(RPAREN) to instantiate.
+*/
struct TokenRange(R) if (isForwardRange!(R))
{
- /**
- * Returns: true if the range is empty
- */
- bool empty() const @property
- {
- return _empty;
- }
+ /**
+ * Returns: true if the range is empty
+ */
+ bool empty() const @property
+ {
+ return _empty;
+ }
- /**
- * Returns: the current token
- */
- ref const(Token) front() const @property
- {
- enforce(!_empty, "Cannot call front() on empty token range");
- return current;
- }
+ /**
+ * Returns: the current token
+ */
+ ref const(Token) front() const @property
+ {
+ enforce(!_empty, "Cannot call front() on empty token range");
+ return current;
+ }
- /**
- * Returns the current token and then removes it from the range
- */
- Token moveFront()
- {
- auto r = front();
- popFront();
- return r;
- }
+ /**
+ * Returns the current token and then removes it from the range
+ */
+ Token moveFront()
+ {
+ auto r = front();
+ popFront();
+ return r;
+ }
- /**
- * Range operation
- */
- int opApply(int delegate(Token) dg)
- {
- int result = 0;
- while (!empty)
- {
- result = dg(front);
- if (result)
- break;
- popFront();
- }
- return result;
- }
+ /**
+ * Range operation
+ */
+ int opApply(int delegate(Token) dg)
+ {
+ int result = 0;
+ while (!empty)
+ {
+ result = dg(front);
+ if (result)
+ break;
+ popFront();
+ }
+ return result;
+ }
- /**
- * Range operation
- */
- int opApply(int delegate(size_t, Token) dg)
- {
- int result = 0;
- int i = 0;
- while (!empty)
- {
- result = dg(i, front);
- if (result)
- break;
- popFront();
- }
- return result;
- }
+ /**
+ * Range operation
+ */
+ int opApply(int delegate(size_t, Token) dg)
+ {
+ int result = 0;
+ int i = 0;
+ while (!empty)
+ {
+ result = dg(i, front);
+ if (result)
+ break;
+ popFront();
+ }
+ return result;
+ }
- /**
- * Removes the current token from the range
- */
- void popFront()
- {
- // Filter out tokens we don't care about
- loop: do
- {
- advance();
- switch (current.type)
- {
- case TokenType.whitespace:
- if (config.iterStyle & IterationStyle.includeWhitespace)
- break loop;
- break;
- case TokenType.comment:
- if (config.iterStyle & IterationStyle.includeComments)
- break loop;
- break;
- case TokenType.specialTokenSequence:
- if (config.iterStyle & IterationStyle.includeSpecialTokens)
- break loop;
- break;
- default:
- break loop;
- }
- }
- while (!empty());
- }
+ /**
+ * Removes the current token from the range
+ */
+ void popFront()
+ {
+ // Filter out tokens we don't care about
+ loop: do
+ {
+ advance();
+ switch (current.type)
+ {
+ case TokenType.whitespace:
+ if (config.iterStyle & IterationStyle.includeWhitespace)
+ break loop;
+ break;
+ case TokenType.comment:
+ if (config.iterStyle & IterationStyle.includeComments)
+ break loop;
+ break;
+ case TokenType.specialTokenSequence:
+ if (config.iterStyle & IterationStyle.includeSpecialTokens)
+ break loop;
+ break;
+ default:
+ break loop;
+ }
+ }
+ while (!empty());
+ }
private:
- this(ref R range)
- {
- this.range = range;
- buffer = uninitializedArray!(ubyte[])(bufferSize);
- }
+ this(ref R range)
+ {
+ this.range = range;
+ buffer = uninitializedArray!(ubyte[])(bufferSize);
+ }
- /*
- * Advances the range to the next token
- */
- void advance()
- {
- if (isEoF())
- {
- _empty = true;
- return;
- }
+ /*
+ * Advances the range to the next token
+ */
+ void advance()
+ {
+ if (isEoF())
+ {
+ _empty = true;
+ return;
+ }
- bufferIndex = 0;
- current.line = lineNumber;
- current.startIndex = index;
- current.column = column;
- current.value = null;
+ bufferIndex = 0;
+ current.line = lineNumber;
+ current.startIndex = index;
+ current.column = column;
+ current.value = null;
- if (isWhite())
- {
- lexWhitespace();
- return;
- }
+ if (isWhite())
+ {
+ if (config.iterStyle & IterationStyle.includeWhitespace)
+ lexWhitespace!true();
+ else
+ lexWhitespace!false();
+ return;
+ }
- switch (currentElement())
- {
+ switch (currentElement())
+ {
// pragma(msg, generateCaseTrie(
- mixin(generateCaseTrie(
- "=", "TokenType.assign",
- "@", "TokenType.at",
- "&", "TokenType.bitAnd",
- "&=", "TokenType.bitAndEquals",
- "|", "TokenType.bitOr",
- "|=", "TokenType.bitOrEquals",
- "~=", "TokenType.catEquals",
- ":", "TokenType.colon",
- ",", "TokenType.comma",
- "--", "TokenType.decrement",
- "$", "TokenType.dollar",
- "==", "TokenType.equals",
- "=>", "TokenType.goesTo",
- ">", "TokenType.greater",
- ">=", "TokenType.greaterEqual",
- "++", "TokenType.increment",
- "{", "TokenType.lBrace",
- "[", "TokenType.lBracket",
- "<", "TokenType.less",
- "<=", "TokenType.lessEqual",
- "<>=", "TokenType.lessEqualGreater",
- "<>", "TokenType.lessOrGreater",
- "&&", "TokenType.logicAnd",
- "||", "TokenType.logicOr",
- "(", "TokenType.lParen",
- "-", "TokenType.minus",
- "-=", "TokenType.minusEquals",
- "%", "TokenType.mod",
- "%=", "TokenType.modEquals",
- "*=", "TokenType.mulEquals",
- "!", "TokenType.not",
- "!=", "TokenType.notEquals",
- "!>", "TokenType.notGreater",
- "!>=", "TokenType.notGreaterEqual",
- "!<", "TokenType.notLess",
- "!<=", "TokenType.notLessEqual",
- "!<>", "TokenType.notLessEqualGreater",
- "+", "TokenType.plus",
- "+=", "TokenType.plusEquals",
- "^^", "TokenType.pow",
- "^^=", "TokenType.powEquals",
- "}", "TokenType.rBrace",
- "]", "TokenType.rBracket",
- ")", "TokenType.rParen",
- ";", "TokenType.semicolon",
- "<<", "TokenType.shiftLeft",
- "<<=", "TokenType.shiftLeftEqual",
- ">>", "TokenType.shiftRight",
- ">>=", "TokenType.shiftRightEqual",
- "*", "TokenType.star",
- "?", "TokenType.ternary",
- "~", "TokenType.tilde",
- "!<>=", "TokenType.unordered",
- ">>>", "TokenType.unsignedShiftRight",
- ">>>=", "TokenType.unsignedShiftRightEqual",
- "^", "TokenType.xor",
- "^=", "TokenType.xorEquals",
- ));
- case '/':
- keepNonNewlineChar();
- if (isEoF())
- {
- current.type = TokenType.div;
- current.value = "/";
- return;
- }
- switch (currentElement())
- {
- case '/':
- case '*':
- case '+':
- lexComment();
- return;
- case '=':
- current.type = TokenType.divEquals;
- current.value = "/=";
- advanceRange();
- return;
- default:
- current.type = TokenType.div;
- current.value = "/";
- return;
- }
- case '.':
- keepNonNewlineChar();
- if (isEoF())
- {
- current.type = TokenType.dot;
- current.value = getTokenValue(TokenType.dot);
- return;
- }
- switch (currentElement())
- {
- case '0': .. case '9':
- lexNumber();
- return;
- case '.':
- current.type = TokenType.slice;
- keepNonNewlineChar();
- if (currentElement() == '.')
- {
- current.type = TokenType.vararg;
- keepNonNewlineChar();
- }
- current.value = getTokenValue(current.type);
- return;
- default:
- current.type = TokenType.dot;
- current.value = getTokenValue(TokenType.dot);
- return;
- }
- case '0': .. case '9':
- keepNonNewlineChar();
- lexNumber();
- return;
- case '\'':
+ mixin(generateCaseTrie(
+ "=", "TokenType.assign",
+ "@", "TokenType.at",
+ "&", "TokenType.bitAnd",
+ "&=", "TokenType.bitAndEquals",
+ "|", "TokenType.bitOr",
+ "|=", "TokenType.bitOrEquals",
+ "~=", "TokenType.catEquals",
+ ":", "TokenType.colon",
+ ",", "TokenType.comma",
+ "--", "TokenType.decrement",
+ "$", "TokenType.dollar",
+ "==", "TokenType.equals",
+ "=>", "TokenType.goesTo",
+ ">", "TokenType.greater",
+ ">=", "TokenType.greaterEqual",
+ "++", "TokenType.increment",
+ "{", "TokenType.lBrace",
+ "[", "TokenType.lBracket",
+ "<", "TokenType.less",
+ "<=", "TokenType.lessEqual",
+ "<>=", "TokenType.lessEqualGreater",
+ "<>", "TokenType.lessOrGreater",
+ "&&", "TokenType.logicAnd",
+ "||", "TokenType.logicOr",
+ "(", "TokenType.lParen",
+ "-", "TokenType.minus",
+ "-=", "TokenType.minusEquals",
+ "%", "TokenType.mod",
+ "%=", "TokenType.modEquals",
+ "*=", "TokenType.mulEquals",
+ "!", "TokenType.not",
+ "!=", "TokenType.notEquals",
+ "!>", "TokenType.notGreater",
+ "!>=", "TokenType.notGreaterEqual",
+ "!<", "TokenType.notLess",
+ "!<=", "TokenType.notLessEqual",
+ "!<>", "TokenType.notLessEqualGreater",
+ "+", "TokenType.plus",
+ "+=", "TokenType.plusEquals",
+ "^^", "TokenType.pow",
+ "^^=", "TokenType.powEquals",
+ "}", "TokenType.rBrace",
+ "]", "TokenType.rBracket",
+ ")", "TokenType.rParen",
+ ";", "TokenType.semicolon",
+ "<<", "TokenType.shiftLeft",
+ "<<=", "TokenType.shiftLeftEqual",
+ ">>", "TokenType.shiftRight",
+ ">>=", "TokenType.shiftRightEqual",
+ "*", "TokenType.star",
+ "?", "TokenType.ternary",
+ "~", "TokenType.tilde",
+ "!<>=", "TokenType.unordered",
+ ">>>", "TokenType.unsignedShiftRight",
+ ">>>=", "TokenType.unsignedShiftRightEqual",
+ "^", "TokenType.xor",
+ "^=", "TokenType.xorEquals",
+ ));
+ case '/':
+ keepNonNewlineChar();
+ if (isEoF())
+ {
+ current.type = TokenType.div;
+ current.value = "/";
+ return;
+ }
+ switch (currentElement())
+ {
+ case '/':
+ case '*':
+ case '+':
+ lexComment();
+ return;
+ case '=':
+ current.type = TokenType.divEquals;
+ current.value = "/=";
+ advanceRange();
+ return;
+ default:
+ current.type = TokenType.div;
+ current.value = "/";
+ return;
+ }
+ case '.':
+ keepNonNewlineChar();
+ if (isEoF())
+ {
+ current.type = TokenType.dot;
+ current.value = getTokenValue(TokenType.dot);
+ return;
+ }
+ switch (currentElement())
+ {
+ case '0': .. case '9':
+ lexNumber();
+ return;
+ case '.':
+ current.type = TokenType.slice;
+ keepNonNewlineChar();
+ if (currentElement() == '.')
+ {
+ current.type = TokenType.vararg;
+ keepNonNewlineChar();
+ }
+ current.value = getTokenValue(current.type);
+ return;
+ default:
+ current.type = TokenType.dot;
+ current.value = getTokenValue(TokenType.dot);
+ return;
+ }
+ case '0': .. case '9':
+ keepNonNewlineChar();
+ lexNumber();
+ return;
+ case '\'':
lexCharacterLiteral();
return;
- case '"':
- case '`':
- lexString();
- return;
- case 'q':
- keepNonNewlineChar();
- if (isEoF())
- goto default;
- switch (currentElement())
- {
- case '{':
- lexTokenString();
- return;
- case '"':
- lexDelimitedString();
- return;
- default:
- break;
- }
- goto default;
- case 'r':
- keepNonNewlineChar();
- if (isEoF())
- goto default;
- else if (currentElement() == '"')
- {
- lexString();
- return;
- }
- else
- goto default;
- case 'x':
- keepNonNewlineChar();
- if (isEoF())
- goto default;
- else if (currentElement() == '"')
- {
- lexHexString();
- return;
- }
- else
- goto default;
- case '#':
- lexSpecialTokenSequence();
- return;
- default:
- while(!isEoF() && !isSeparating())
- {
- keepNonNewlineChar();
- }
+ case '"':
+ case '`':
+ lexString();
+ return;
+ case 'q':
+ keepNonNewlineChar();
+ if (isEoF())
+ goto default;
+ switch (currentElement())
+ {
+ case '{':
+ lexTokenString();
+ return;
+ case '"':
+ lexDelimitedString();
+ return;
+ default:
+ break;
+ }
+ goto default;
+ case 'r':
+ keepNonNewlineChar();
+ if (isEoF())
+ goto default;
+ else if (currentElement() == '"')
+ {
+ lexString();
+ return;
+ }
+ else
+ goto default;
+ case 'x':
+ keepNonNewlineChar();
+ if (isEoF())
+ goto default;
+ else if (currentElement() == '"')
+ {
+ lexHexString();
+ return;
+ }
+ else
+ goto default;
+ case '#':
+ lexSpecialTokenSequence();
+ return;
+ default:
+ while(!isEoF() && !isSeparating())
+ {
+ keepNonNewlineChar();
+ }
- current.type = lookupTokenType(cast(char[]) buffer[0 .. bufferIndex]);
- current.value = getTokenValue(current.type);
- if (current.value is null)
- setTokenValue();
+ current.type = lookupTokenType(cast(char[]) buffer[0 .. bufferIndex]);
+ current.value = getTokenValue(current.type);
+ if (current.value is null)
+ setTokenValue();
- if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof)
- {
- _empty = true;
- return;
- }
+ if (!(config.iterStyle & IterationStyle.ignoreEOF) && current.type == TokenType.eof)
+ {
+ _empty = true;
+ return;
+ }
- if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial))
- return;
+ if (!(config.iterStyle & TokenStyle.doNotReplaceSpecial))
+ return;
- switch (current.type)
- {
- case TokenType.date:
- current.type = TokenType.stringLiteral;
- auto time = Clock.currTime();
- current.value = format("%s %02d %04d", time.month, time.day, time.year);
- return;
- case TokenType.time:
- auto time = Clock.currTime();
- current.type = TokenType.stringLiteral;
- current.value = (cast(TimeOfDay)(time)).toISOExtString();
- return;
- case TokenType.timestamp:
- auto time = Clock.currTime();
- auto dt = cast(DateTime) time;
- current.type = TokenType.stringLiteral;
- current.value = format("%s %s %02d %02d:%02d:%02d %04d",
- dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
- dt.second, dt.year);
- return;
- case TokenType.vendor:
- current.type = TokenType.stringLiteral;
- current.value = config.vendorString;
- return;
- case TokenType.compilerVersion:
- current.type = TokenType.stringLiteral;
- current.value = format("%d", config.versionNumber);
- return;
- case TokenType.line:
- current.type = TokenType.intLiteral;
- current.value = format("%d", current.line);
- return;
- case TokenType.file:
- current.type = TokenType.stringLiteral;
- current.value = config.fileName;
- return;
- default:
- return;
- }
- }
- }
+ switch (current.type)
+ {
+ case TokenType.date:
+ current.type = TokenType.stringLiteral;
+ auto time = Clock.currTime();
+ current.value = format("%s %02d %04d", time.month, time.day, time.year);
+ return;
+ case TokenType.time:
+ auto time = Clock.currTime();
+ current.type = TokenType.stringLiteral;
+ current.value = (cast(TimeOfDay)(time)).toISOExtString();
+ return;
+ case TokenType.timestamp:
+ auto time = Clock.currTime();
+ auto dt = cast(DateTime) time;
+ current.type = TokenType.stringLiteral;
+ current.value = format("%s %s %02d %02d:%02d:%02d %04d",
+ dt.dayOfWeek, dt.month, dt.day, dt.hour, dt.minute,
+ dt.second, dt.year);
+ return;
+ case TokenType.vendor:
+ current.type = TokenType.stringLiteral;
+ current.value = config.vendorString;
+ return;
+ case TokenType.compilerVersion:
+ current.type = TokenType.stringLiteral;
+ current.value = format("%d", config.versionNumber);
+ return;
+ case TokenType.line:
+ current.type = TokenType.intLiteral;
+ current.value = format("%d", current.line);
+ return;
+ case TokenType.file:
+ current.type = TokenType.stringLiteral;
+ current.value = config.fileName;
+ return;
+ default:
+ return;
+ }
+ }
+ }
- void lexWhitespace()
- {
- current.type = TokenType.whitespace;
- while (!isEoF() && isWhite())
- {
- keepChar();
- }
- if (config.iterStyle & IterationStyle.includeWhitespace)
- setTokenValue();
- }
+ void lexWhitespace(bool keep)()
+ {
+ current.type = TokenType.whitespace;
+ while (!isEoF() && isWhite())
+ {
+ static if (keep) keepChar();
+ else advanceRange();
+ }
+ static if (keep) setTokenValue();
+ }
- void lexComment()
- in
- {
- assert (currentElement() == '/' || currentElement() == '*' || currentElement() == '+');
- }
- body
- {
- current.type = TokenType.comment;
- switch(currentElement())
- {
- case '/':
- while (!isEoF() && !isNewline(currentElement()))
- {
- keepNonNewlineChar();
- }
- break;
- case '*':
- while (!isEoF())
- {
- if (currentElement() == '*')
- {
- keepNonNewlineChar();
- if (currentElement() == '/')
- {
- keepNonNewlineChar();
- break;
- }
- }
- else
- keepChar();
- }
- break;
- case '+':
- int depth = 1;
- while (depth > 0 && !isEoF())
- {
- if (currentElement() == '+')
- {
- keepNonNewlineChar();
- if (currentElement() == '/')
- {
- keepNonNewlineChar();
- --depth;
- }
- }
- else if (currentElement() == '/')
- {
- keepNonNewlineChar();
- if (currentElement() == '+')
- {
- keepNonNewlineChar();
- ++depth;
- }
- }
- else
- keepChar();
- }
- break;
- default:
- assert(false);
- }
- if (config.iterStyle & IterationStyle.includeComments)
- setTokenValue();
- }
+ void lexComment()
+ in
+ {
+ assert (currentElement() == '/' || currentElement() == '*' || currentElement() == '+');
+ }
+ body
+ {
+ current.type = TokenType.comment;
+ switch(currentElement())
+ {
+ case '/':
+ while (!isEoF() && !isNewline(currentElement()))
+ {
+ keepNonNewlineChar();
+ }
+ break;
+ case '*':
+ while (!isEoF())
+ {
+ if (currentElement() == '*')
+ {
+ keepNonNewlineChar();
+ if (currentElement() == '/')
+ {
+ keepNonNewlineChar();
+ break;
+ }
+ }
+ else
+ keepChar();
+ }
+ break;
+ case '+':
+ int depth = 1;
+ while (depth > 0 && !isEoF())
+ {
+ if (currentElement() == '+')
+ {
+ keepNonNewlineChar();
+ if (currentElement() == '/')
+ {
+ keepNonNewlineChar();
+ --depth;
+ }
+ }
+ else if (currentElement() == '/')
+ {
+ keepNonNewlineChar();
+ if (currentElement() == '+')
+ {
+ keepNonNewlineChar();
+ ++depth;
+ }
+ }
+ else
+ keepChar();
+ }
+ break;
+ default:
+ assert(false);
+ }
+ if (config.iterStyle & IterationStyle.includeComments)
+ setTokenValue();
+ }
- void lexHexString()
- in
- {
- assert (currentElement() == '"' && buffer[0] == 'x');
- }
- body
- {
- current.type = TokenType.stringLiteral;
- keepChar();
- while (true)
- {
- if (isEoF())
- {
- errorMessage("Unterminated hex string literal");
- return;
- }
- else if (isHexDigit(currentElement()))
- {
- keepNonNewlineChar();
- }
- else if (isWhite() && (config.tokenStyle & TokenStyle.notEscaped))
- {
- keepChar();
- }
- else if (currentElement() == '"')
- {
- keepNonNewlineChar();
- break;
- }
- else
- {
- errorMessage(format("Invalid character '%s' in hex string literal",
- cast(char) currentElement()));
- return;
- }
- }
- lexStringSuffix();
- if (config.tokenStyle & TokenStyle.notEscaped)
- {
- if (config.tokenStyle & TokenStyle.includeQuotes)
- setTokenValue();
- else
- setTokenValue(2, bufferIndex - 1);
- }
- else
- {
- auto a = appender!(ubyte[])();
- foreach (b; std.range.chunks(buffer[2 .. bufferIndex - 1], 2))
- {
- string s = to!string(cast(char[]) b);
- a.put(cast(ubyte[]) to!string(cast(dchar) parse!uint(s, 16)));
- }
- current.value = to!string(cast(char[]) a.data);
- }
- }
+ void lexHexString()
+ in
+ {
+ assert (currentElement() == '"' && buffer[0] == 'x');
+ }
+ body
+ {
+ current.type = TokenType.stringLiteral;
+ keepChar();
+ while (true)
+ {
+ if (isEoF())
+ {
+ errorMessage("Unterminated hex string literal");
+ return;
+ }
+ else if (isHexDigit(currentElement()))
+ {
+ keepNonNewlineChar();
+ }
+ else if (isWhite() && (config.tokenStyle & TokenStyle.notEscaped))
+ {
+ keepChar();
+ }
+ else if (currentElement() == '"')
+ {
+ keepNonNewlineChar();
+ break;
+ }
+ else
+ {
+ errorMessage(format("Invalid character '%s' in hex string literal",
+ cast(char) currentElement()));
+ return;
+ }
+ }
+ lexStringSuffix();
+ if (config.tokenStyle & TokenStyle.notEscaped)
+ {
+ if (config.tokenStyle & TokenStyle.includeQuotes)
+ setTokenValue();
+ else
+ setTokenValue(2, bufferIndex - 1);
+ }
+ else
+ {
+ auto a = appender!(ubyte[])();
+ foreach (b; std.range.chunks(buffer[2 .. bufferIndex - 1], 2))
+ {
+ string s = to!string(cast(char[]) b);
+ a.put(cast(ubyte[]) to!string(cast(dchar) parse!uint(s, 16)));
+ }
+ current.value = to!string(cast(char[]) a.data);
+ }
+ }
- void lexNumber()
- in
- {
- assert(buffer[0] || buffer[0] == '.');
- }
- body
- {
- // hex and binary can start with zero, anything else is decimal
- if (currentElement() != '0')
- lexDecimal();
- else
- {
- switch (currentElement())
- {
- case 'x':
- case 'X':
- keepNonNewlineChar();
- lexHex();
- break;
- case 'b':
- case 'B':
- keepNonNewlineChar();
- lexBinary();
- break;
- default:
- lexDecimal();
- return;
- }
- }
- }
+ void lexNumber()
+ in
+ {
+ assert(buffer[0] || buffer[0] == '.');
+ }
+ body
+ {
+ // hex and binary can start with zero, anything else is decimal
+ if (currentElement() != '0')
+ lexDecimal();
+ else
+ {
+ switch (currentElement())
+ {
+ case 'x':
+ case 'X':
+ keepNonNewlineChar();
+ lexHex();
+ break;
+ case 'b':
+ case 'B':
+ keepNonNewlineChar();
+ lexBinary();
+ break;
+ default:
+ lexDecimal();
+ return;
+ }
+ }
+ }
- void lexFloatSuffix()
- {
- switch (currentElement())
- {
- case 'L':
- keepNonNewlineChar();
- current.type = TokenType.doubleLiteral;
- break;
- case 'f':
- case 'F':
- keepNonNewlineChar();
- current.type = TokenType.floatLiteral;
- break;
- default:
- break;
- }
- if (!isEoF() && currentElement() == 'i')
- {
- keepNonNewlineChar();
- if (current.type == TokenType.floatLiteral)
- current.type = TokenType.ifloatLiteral;
- else
- current.type = TokenType.idoubleLiteral;
- }
- }
+ void lexFloatSuffix()
+ {
+ switch (currentElement())
+ {
+ case 'L':
+ keepNonNewlineChar();
+ current.type = TokenType.doubleLiteral;
+ break;
+ case 'f':
+ case 'F':
+ keepNonNewlineChar();
+ current.type = TokenType.floatLiteral;
+ break;
+ default:
+ break;
+ }
+ if (!isEoF() && currentElement() == 'i')
+ {
+ keepNonNewlineChar();
+ if (current.type == TokenType.floatLiteral)
+ current.type = TokenType.ifloatLiteral;
+ else
+ current.type = TokenType.idoubleLiteral;
+ }
+ }
- void lexIntSuffix()
- {
- bool foundU;
- bool foundL;
- while (!isEoF())
- {
- switch (currentElement())
- {
- case 'u':
- case 'U':
- if (foundU)
- return;
- switch (current.type)
- {
- case TokenType.intLiteral:
- current.type = TokenType.uintLiteral;
- keepNonNewlineChar();
- break;
- case TokenType.longLiteral:
- current.type = TokenType.ulongLiteral;
- keepNonNewlineChar();
- break;
- default:
- return;
- }
- foundU = true;
- break;
- case 'L':
- if (foundL)
- return;
- switch (current.type)
- {
- case TokenType.intLiteral:
- current.type = TokenType.longLiteral;
- keepNonNewlineChar();
- break;
- case TokenType.uintLiteral:
- current.type = TokenType.ulongLiteral;
- keepNonNewlineChar();
- break;
- default:
- return;
- }
- foundL = true;
- break;
- default:
- return;
- }
- }
- }
+ void lexIntSuffix()
+ {
+ bool foundU;
+ bool foundL;
+ while (!isEoF())
+ {
+ switch (currentElement())
+ {
+ case 'u':
+ case 'U':
+ if (foundU)
+ return;
+ switch (current.type)
+ {
+ case TokenType.intLiteral:
+ current.type = TokenType.uintLiteral;
+ keepNonNewlineChar();
+ break;
+ case TokenType.longLiteral:
+ current.type = TokenType.ulongLiteral;
+ keepNonNewlineChar();
+ break;
+ default:
+ return;
+ }
+ foundU = true;
+ break;
+ case 'L':
+ if (foundL)
+ return;
+ switch (current.type)
+ {
+ case TokenType.intLiteral:
+ current.type = TokenType.longLiteral;
+ keepNonNewlineChar();
+ break;
+ case TokenType.uintLiteral:
+ current.type = TokenType.ulongLiteral;
+ keepNonNewlineChar();
+ break;
+ default:
+ return;
+ }
+ foundL = true;
+ break;
+ default:
+ return;
+ }
+ }
+ }
- void lexExponent()
- in
- {
- assert (currentElement() == 'e' || currentElement() == 'E' || currentElement() == 'p'
- || currentElement() == 'P');
- }
- body
- {
- keepNonNewlineChar();
- bool foundSign = false;
- while (!isEoF())
- {
- switch (currentElement())
- {
- case '-':
- case '+':
- if (foundSign)
- return;
- foundSign = true;
- keepNonNewlineChar();
- case '0': .. case '9':
- case '_':
- keepNonNewlineChar();
- break;
- case 'L':
- case 'f':
- case 'F':
- case 'i':
- lexFloatSuffix();
- return;
- default:
- return;
- }
- }
- }
+ void lexExponent()
+ in
+ {
+ assert (currentElement() == 'e' || currentElement() == 'E' || currentElement() == 'p'
+ || currentElement() == 'P');
+ }
+ body
+ {
+ keepNonNewlineChar();
+ bool foundSign = false;
+ while (!isEoF())
+ {
+ switch (currentElement())
+ {
+ case '-':
+ case '+':
+ if (foundSign)
+ return;
+ foundSign = true;
+ keepNonNewlineChar();
+ case '0': .. case '9':
+ case '_':
+ keepNonNewlineChar();
+ break;
+ case 'L':
+ case 'f':
+ case 'F':
+ case 'i':
+ lexFloatSuffix();
+ return;
+ default:
+ return;
+ }
+ }
+ }
- void lexDecimal()
- in
- {
- assert ((buffer[0] >= '0' && buffer[0] <= '9') || buffer[0] == '.');
- }
- body
- {
- bool foundDot = false;
- current.type = TokenType.intLiteral;
- scope(exit) setTokenValue();
- decimalLoop: while (!isEoF())
- {
- switch (currentElement())
- {
- case '0': .. case '9':
- case '_':
- keepNonNewlineChar();
- break;
+ void lexDecimal()
+ in
+ {
+ assert ((buffer[0] >= '0' && buffer[0] <= '9') || buffer[0] == '.');
+ }
+ body
+ {
+ bool foundDot = false;
+ current.type = TokenType.intLiteral;
+ scope(exit) setTokenValue();
+ decimalLoop: while (!isEoF())
+ {
+ switch (currentElement())
+ {
+ case '0': .. case '9':
+ case '_':
+ keepNonNewlineChar();
+ break;
case 'u':
case 'U':
if (foundDot)
@@ -971,173 +974,173 @@ private:
}
else
lexIntSuffix();
- case 'i':
- case 'L':
- if (foundDot)
- {
- lexFloatSuffix();
- return;
- }
- else
- {
- lexIntSuffix();
- return;
- }
- case 'f':
- case 'F':
- lexFloatSuffix();
- return;
- case 'e':
- case 'E':
- lexExponent();
- return;
- case '.':
- static if (isArray!R)
- auto r = range[index .. $];
- else
- auto r = range.save();
- r.popFront();
- if (!r.isRangeEoF() && r.front == '.')
- break decimalLoop; // possibly slice expression
- if (foundDot)
- break decimalLoop; // two dots with other characters between them
- keepNonNewlineChar();
- foundDot = true;
- current.type = TokenType.doubleLiteral;
- break;
- default:
- break decimalLoop;
- }
- }
+ case 'i':
+ case 'L':
+ if (foundDot)
+ {
+ lexFloatSuffix();
+ return;
+ }
+ else
+ {
+ lexIntSuffix();
+ return;
+ }
+ case 'f':
+ case 'F':
+ lexFloatSuffix();
+ return;
+ case 'e':
+ case 'E':
+ lexExponent();
+ return;
+ case '.':
+ static if (isArray!R)
+ auto r = range[index .. $];
+ else
+ auto r = range.save();
+ r.popFront();
+ if (!r.isRangeEoF() && r.front == '.')
+ break decimalLoop; // possibly slice expression
+ if (foundDot)
+ break decimalLoop; // two dots with other characters between them
+ keepNonNewlineChar();
+ foundDot = true;
+ current.type = TokenType.doubleLiteral;
+ break;
+ default:
+ break decimalLoop;
+ }
+ }
- }
+ }
- void lexBinary()
- {
- current.type = TokenType.intLiteral;
- scope(exit) setTokenValue();
- binaryLoop: while (!isEoF())
- {
- switch (currentElement())
- {
- case '0':
- case '1':
- case '_':
- keepNonNewlineChar();
- break;
- case 'u':
- case 'U':
- case 'L':
- lexIntSuffix();
- return;
- default:
- break binaryLoop;
- }
- }
- }
+ void lexBinary()
+ {
+ current.type = TokenType.intLiteral;
+ scope(exit) setTokenValue();
+ binaryLoop: while (!isEoF())
+ {
+ switch (currentElement())
+ {
+ case '0':
+ case '1':
+ case '_':
+ keepNonNewlineChar();
+ break;
+ case 'u':
+ case 'U':
+ case 'L':
+ lexIntSuffix();
+ return;
+ default:
+ break binaryLoop;
+ }
+ }
+ }
- void lexHex()
- {
- current.type = TokenType.intLiteral;
- scope(exit) setTokenValue();
- bool foundDot;
- hexLoop: while (!isEoF())
- {
- switch (currentElement())
- {
- case 'a': .. case 'f':
- case 'A': .. case 'F':
- case '0': .. case '9':
- case '_':
- keepNonNewlineChar();
- break;
- case 'i':
- case 'L':
- if (foundDot)
- {
- lexFloatSuffix();
- return;
- }
- else
- {
- lexIntSuffix();
- return;
- }
- case 'p':
- case 'P':
- lexExponent();
- return;
- case '.':
- static if (isArray!R)
- auto r = range[index .. $];
- else
- auto r = range.save();
- r.popFront();
- if (!r.isRangeEoF() && r.front == '.')
- break hexLoop; // slice expression
- if (foundDot)
- break hexLoop; // two dots with other characters between them
- keepNonNewlineChar();
- foundDot = true;
- current.type = TokenType.doubleLiteral;
- break;
- default:
- break hexLoop;
- }
- }
- }
+ void lexHex()
+ {
+ current.type = TokenType.intLiteral;
+ scope(exit) setTokenValue();
+ bool foundDot;
+ hexLoop: while (!isEoF())
+ {
+ switch (currentElement())
+ {
+ case 'a': .. case 'f':
+ case 'A': .. case 'F':
+ case '0': .. case '9':
+ case '_':
+ keepNonNewlineChar();
+ break;
+ case 'i':
+ case 'L':
+ if (foundDot)
+ {
+ lexFloatSuffix();
+ return;
+ }
+ else
+ {
+ lexIntSuffix();
+ return;
+ }
+ case 'p':
+ case 'P':
+ lexExponent();
+ return;
+ case '.':
+ static if (isArray!R)
+ auto r = range[index .. $];
+ else
+ auto r = range.save();
+ r.popFront();
+ if (!r.isRangeEoF() && r.front == '.')
+ break hexLoop; // slice expression
+ if (foundDot)
+ break hexLoop; // two dots with other characters between them
+ keepNonNewlineChar();
+ foundDot = true;
+ current.type = TokenType.doubleLiteral;
+ break;
+ default:
+ break hexLoop;
+ }
+ }
+ }
- void lexStringSuffix()
- {
- current.type = TokenType.stringLiteral;
- if (!isEoF())
- {
- switch (currentElement())
- {
- case 'w':
- current.type = TokenType.wstringLiteral;
- goto case 'c';
- case 'd':
- current.type = TokenType.dstringLiteral;
- goto case 'c';
- case 'c':
- keepNonNewlineChar();
- break;
- default:
- break;
- }
- }
- }
+ void lexStringSuffix()
+ {
+ current.type = TokenType.stringLiteral;
+ if (!isEoF())
+ {
+ switch (currentElement())
+ {
+ case 'w':
+ current.type = TokenType.wstringLiteral;
+ goto case 'c';
+ case 'd':
+ current.type = TokenType.dstringLiteral;
+ goto case 'c';
+ case 'c':
+ keepNonNewlineChar();
+ break;
+ default:
+ break;
+ }
+ }
+ }
- void lexCharacterLiteral()
- in
- {
- assert (currentElement() == '\'');
- }
- body
- {
- current.type = TokenType.characterLiteral;
- scope (exit)
- {
- if (config.tokenStyle & TokenStyle.includeQuotes)
- setTokenValue();
- else
- setTokenValue(1, bufferIndex - 1);
- }
- keepChar();
- if (isEoF())
- {
- errorMessage("Unterminated character literal");
- return;
- }
- switch (currentElement())
- {
- case '\'':
- return;
- case '\\':
- lexEscapeSequence();
- break;
- default:
+ void lexCharacterLiteral()
+ in
+ {
+ assert (currentElement() == '\'');
+ }
+ body
+ {
+ current.type = TokenType.characterLiteral;
+ scope (exit)
+ {
+ if (config.tokenStyle & TokenStyle.includeQuotes)
+ setTokenValue();
+ else
+ setTokenValue(1, bufferIndex - 1);
+ }
+ keepChar();
+ if (isEoF())
+ {
+ errorMessage("Unterminated character literal");
+ return;
+ }
+ switch (currentElement())
+ {
+ case '\'':
+ return;
+ case '\\':
+ lexEscapeSequence();
+ break;
+ default:
if (currentElement() & 0x80)
{
while (currentElement() & 0x80)
@@ -1149,117 +1152,117 @@ private:
keepChar();
break;
}
- }
- if (currentElement() != '\'')
- {
- errorMessage("Expected \"'\" to end character literal");
- return;
- }
- keepChar();
- }
+ }
+ if (currentElement() != '\'')
+ {
+ errorMessage("Expected \"'\" to end character literal");
+ return;
+ }
+ keepChar();
+ }
- void lexString()
- in
- {
- assert (currentElement() == '"' || currentElement() == '`');
- }
- body
- {
- current.type = TokenType.stringLiteral;
- bool isWysiwyg = buffer[0] == 'r' || currentElement() == '`';
+ void lexString()
+ in
+ {
+ assert (currentElement() == '"' || currentElement() == '`');
+ }
+ body
+ {
+ current.type = TokenType.stringLiteral;
+ bool isWysiwyg = buffer[0] == 'r' || currentElement() == '`';
- scope (exit)
- {
- if (config.tokenStyle & TokenStyle.includeQuotes)
- setTokenValue();
- else
- {
- if (buffer[0] == 'r')
- setTokenValue(2, bufferIndex - 1);
- else
- setTokenValue(1, bufferIndex - 1);
- }
- }
+ scope (exit)
+ {
+ if (config.tokenStyle & TokenStyle.includeQuotes)
+ setTokenValue();
+ else
+ {
+ if (buffer[0] == 'r')
+ setTokenValue(2, bufferIndex - 1);
+ else
+ setTokenValue(1, bufferIndex - 1);
+ }
+ }
- auto quote = currentElement();
- keepChar();
- while (true)
- {
- if (isEoF())
- {
- errorMessage("Unterminated string literal");
- return;
- }
- else if (currentElement() == '\\')
- {
- if (isWysiwyg)
- keepChar();
- else
- lexEscapeSequence();
- }
- else if (currentElement() == quote)
- {
- keepNonNewlineChar();
- break;
- }
- else
- keepChar();
- }
- lexStringSuffix();
- }
+ auto quote = currentElement();
+ keepChar();
+ while (true)
+ {
+ if (isEoF())
+ {
+ errorMessage("Unterminated string literal");
+ return;
+ }
+ else if (currentElement() == '\\')
+ {
+ if (isWysiwyg)
+ keepChar();
+ else
+ lexEscapeSequence();
+ }
+ else if (currentElement() == quote)
+ {
+ keepNonNewlineChar();
+ break;
+ }
+ else
+ keepChar();
+ }
+ lexStringSuffix();
+ }
- void lexEscapeSequence()
- in
- {
- assert (currentElement() == '\\');
- }
- body
- {
- if (config.tokenStyle & TokenStyle.notEscaped)
- {
- keepChar();
- switch (currentElement())
- {
- case '\'':
- case '"':
- case '?':
- case '\\':
- case 'a':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- case 0x0a:
- case 0x00:
- keepChar();
- return;
- case '0': .. case '7':
- foreach(i; 0 .. 3)
- {
- keepChar();
- if (currentElement() < '0' || currentElement() > '7') return;
- }
- return;
- case 'x':
- keepChar();
- foreach(i; 0 .. 4)
- {
- if (!isHexDigit(currentElement()))
- {
- errorMessage("Expected hex digit");
- return;
- }
- keepChar();
- }
- return;
- case 'u':
- case 'U':
+ void lexEscapeSequence()
+ in
+ {
+ assert (currentElement() == '\\');
+ }
+ body
+ {
+ if (config.tokenStyle & TokenStyle.notEscaped)
+ {
+ keepChar();
+ switch (currentElement())
+ {
+ case '\'':
+ case '"':
+ case '?':
+ case '\\':
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v':
+ case 0x0a:
+ case 0x00:
+ keepChar();
+ return;
+ case '0': .. case '7':
+ foreach(i; 0 .. 3)
+ {
+ keepChar();
+ if (currentElement() < '0' || currentElement() > '7') return;
+ }
+ return;
+ case 'x':
+ keepChar();
+ foreach(i; 0 .. 4)
+ {
+ if (!isHexDigit(currentElement()))
+ {
+ errorMessage("Expected hex digit");
+ return;
+ }
+ keepChar();
+ }
+ return;
+ case 'u':
+ case 'U':
uint digits = currentElement == 'u' ? 4 : 8;
- keepChar();
- foreach (i; 0 .. digits)
- {
+ keepChar();
+ foreach (i; 0 .. digits)
+ {
if (!isHexDigit(currentElement()))
{
errorMessage("Expected hex digit instead of %s".format(
@@ -1268,7 +1271,7 @@ private:
}
keepChar();
}
- return;
+ return;
case '&':
while (!isEoF())
{
@@ -1277,60 +1280,60 @@ private:
break;
}
return;
- default:
- errorMessage("Invalid escape sequence");
- return;
- }
- }
- else
- {
- advanceRange();
- switch (currentElement())
- {
- case '\'': bufferChar('\''); advanceRange(); return;
- case '"': bufferChar('"'); advanceRange(); return;
- case '?': bufferChar('\?'); advanceRange(); return;
- case '\\': bufferChar('\\'); advanceRange(); return;
- case 'a': bufferChar('\a'); advanceRange(); return;
- case 'b': bufferChar('\b'); advanceRange(); return;
- case 'f': bufferChar('\f'); advanceRange(); return;
- case 'n': bufferChar('\n'); advanceRange(); return;
- case 'r': bufferChar('\r'); advanceRange(); return;
- case 't': bufferChar('\t'); advanceRange(); return;
- case 'v': bufferChar('\v'); advanceRange(); return;
- case 0x0a: bufferChar(0x0a); advanceRange(); return;
- case 0x00: bufferChar(0x00); advanceRange(); return;
- case '0': .. case '7':
- ubyte[3] digits;
- size_t i;
- while(i < 3 && !isEoF())
- {
- digits[i++] = currentElement();
- advanceRange();
- if (currentElement() < '0' || currentElement() > '7') break;
- }
- decodeAndStore(digits, i, 8);
- return;
- case 'x':
- ubyte[2] digits;
- advanceRange();
- foreach(i; 0 .. 2)
- {
- if (!isHexDigit(currentElement()))
- {
- errorMessage("Expected hex digit");
- return;
- }
- digits[i] = currentElement();
+ default:
+ errorMessage("Invalid escape sequence");
+ return;
+ }
+ }
+ else
+ {
+ advanceRange();
+ switch (currentElement())
+ {
+ case '\'': bufferChar('\''); advanceRange(); return;
+ case '"': bufferChar('"'); advanceRange(); return;
+ case '?': bufferChar('\?'); advanceRange(); return;
+ case '\\': bufferChar('\\'); advanceRange(); return;
+ case 'a': bufferChar('\a'); advanceRange(); return;
+ case 'b': bufferChar('\b'); advanceRange(); return;
+ case 'f': bufferChar('\f'); advanceRange(); return;
+ case 'n': bufferChar('\n'); advanceRange(); return;
+ case 'r': bufferChar('\r'); advanceRange(); return;
+ case 't': bufferChar('\t'); advanceRange(); return;
+ case 'v': bufferChar('\v'); advanceRange(); return;
+ case 0x0a: bufferChar(0x0a); advanceRange(); return;
+ case 0x00: bufferChar(0x00); advanceRange(); return;
+ case '0': .. case '7':
+ ubyte[3] digits;
+ size_t i;
+ while(i < 3 && !isEoF())
+ {
+ digits[i++] = currentElement();
advanceRange();
- }
- decodeAndStore(digits, 2, 16);
- return;
- case 'u':
- case 'U':
+ if (currentElement() < '0' || currentElement() > '7') break;
+ }
+ decodeAndStore(digits, i, 8);
+ return;
+ case 'x':
+ ubyte[2] digits;
+ advanceRange();
+ foreach(i; 0 .. 2)
+ {
+ if (!isHexDigit(currentElement()))
+ {
+ errorMessage("Expected hex digit");
+ return;
+ }
+ digits[i] = currentElement();
+ advanceRange();
+ }
+ decodeAndStore(digits, 2, 16);
+ return;
+ case 'u':
+ case 'U':
uint digitCount = currentElement == 'u' ? 4 : 8;
- advanceRange();
- ubyte[8] digits;
+ advanceRange();
+ ubyte[8] digits;
foreach (i; 0 .. digitCount)
{
if (!isHexDigit(currentElement()))
@@ -1341,8 +1344,8 @@ private:
digits[i] = currentElement();
advanceRange();
}
- decodeAndStore(digits, digitCount, 16);
- return;
+ decodeAndStore(digits, digitCount, 16);
+ return;
case '&':
advanceRange();
ubyte[] b;
@@ -1377,742 +1380,742 @@ private:
bufferChar(cast(ubyte) (*entity)[i]);
}
return;
- default:
- errorMessage("Invalid escape sequence");
- return;
- }
- }
- }
+ default:
+ errorMessage("Invalid escape sequence");
+ return;
+ }
+ }
+ }
- void decodeAndStore(ubyte[] digits, size_t maxIndex, uint base)
- {
+ void decodeAndStore(ubyte[] digits, size_t maxIndex, uint base)
+ {
scope(failure)
{
import std.stdio;
stderr.writeln("Failed on line ", lineNumber, " of file ",
config.fileName);
}
- char[4] codeUnits;
- auto source = cast(char[]) digits[0 .. maxIndex];
- uint codePoint = parse!uint(source, base);
- ulong unitCount = encode(codeUnits, codePoint);
- foreach (i; 0 .. unitCount)
- bufferChar(codeUnits[i]);
- }
+ char[4] codeUnits;
+ auto source = cast(char[]) digits[0 .. maxIndex];
+ uint codePoint = parse!uint(source, base);
+ ulong unitCount = encode(codeUnits, codePoint);
+ foreach (i; 0 .. unitCount)
+ bufferChar(codeUnits[i]);
+ }
- void lexDelimitedString()
- in
- {
- assert(currentElement() == '"');
- }
- body
- {
- current.type = TokenType.stringLiteral;
+ void lexDelimitedString()
+ in
+ {
+ assert(currentElement() == '"');
+ }
+ body
+ {
+ current.type = TokenType.stringLiteral;
- keepChar();
+ keepChar();
- bool heredoc;
- ubyte open;
- ubyte close;
+ bool heredoc;
+ ubyte open;
+ ubyte close;
- switch (currentElement())
- {
- case '[': open = '['; close = ']'; break;
- case '{': open = '{'; close = '}'; break;
- case '(': open = '('; close = ')'; break;
- case '<': open = '<'; close = '>'; break;
- default: heredoc = true; break;
- }
- if (heredoc)
- lexHeredocString();
- else
- lexNormalDelimitedString(open, close);
- }
+ switch (currentElement())
+ {
+ case '[': open = '['; close = ']'; break;
+ case '{': open = '{'; close = '}'; break;
+ case '(': open = '('; close = ')'; break;
+ case '<': open = '<'; close = '>'; break;
+ default: heredoc = true; break;
+ }
+ if (heredoc)
+ lexHeredocString();
+ else
+ lexNormalDelimitedString(open, close);
+ }
- void lexNormalDelimitedString(ubyte open, ubyte close)
- in
- {
- assert(buffer[0 .. 2] == `q"`);
- }
- body
- {
- current.type = TokenType.stringLiteral;
- int depth = 1;
- keepChar();
- scope (exit)
- {
- if (config.tokenStyle & TokenStyle.includeQuotes)
- setTokenValue();
- else
- setTokenValue(3, bufferIndex - 2);
- }
- while (true)
- {
- if (isEoF())
- errorMessage("Unterminated string literal");
- if (currentElement() == open)
- {
- keepChar();
- ++depth;
- }
- else if (currentElement() == close)
- {
- keepChar();
- --depth;
- if (depth <= 0)
- {
- static if (isArray!R)
- auto r = range[index .. $];
- else
- auto r = range.save();
- if (r.front == '"')
- {
- keepChar();
- return;
- }
- else
- {
- errorMessage("Expected \" after balanced "
- ~ cast(char) close ~ " but found "
- ~ cast(char) r.front ~ " instead.");
- return;
- }
- }
- }
- else
- keepChar();
- }
+ void lexNormalDelimitedString(ubyte open, ubyte close)
+ in
+ {
+ assert(buffer[0 .. 2] == `q"`);
+ }
+ body
+ {
+ current.type = TokenType.stringLiteral;
+ int depth = 1;
+ keepChar();
+ scope (exit)
+ {
+ if (config.tokenStyle & TokenStyle.includeQuotes)
+ setTokenValue();
+ else
+ setTokenValue(3, bufferIndex - 2);
+ }
+ while (true)
+ {
+ if (isEoF())
+ errorMessage("Unterminated string literal");
+ if (currentElement() == open)
+ {
+ keepChar();
+ ++depth;
+ }
+ else if (currentElement() == close)
+ {
+ keepChar();
+ --depth;
+ if (depth <= 0)
+ {
+ static if (isArray!R)
+ auto r = range[index .. $];
+ else
+ auto r = range.save();
+ if (r.front == '"')
+ {
+ keepChar();
+ return;
+ }
+ else
+ {
+ errorMessage("Expected \" after balanced "
+ ~ cast(char) close ~ " but found "
+ ~ cast(char) r.front ~ " instead.");
+ return;
+ }
+ }
+ }
+ else
+ keepChar();
+ }
- }
+ }
- void lexHeredocString()
- in
- {
- assert (buffer[0 .. bufferIndex] == "q\"");
- }
- body
- {
- auto i = bufferIndex;
- while (true)
- {
- if (isEoF())
- {
- errorMessage("Unterminated string literal");
- return;
- }
- else if (isNewline(currentElement()))
- {
- keepChar();
- break;
- }
- else if (isSeparating())
- {
- errorMessage("Unterminated string literal - Separating");
- return;
- }
- else
- keepChar();
- }
- auto ident = buffer[i .. bufferIndex - 1];
+ void lexHeredocString()
+ in
+ {
+ assert (buffer[0 .. bufferIndex] == "q\"");
+ }
+ body
+ {
+ auto i = bufferIndex;
+ while (true)
+ {
+ if (isEoF())
+ {
+ errorMessage("Unterminated string literal");
+ return;
+ }
+ else if (isNewline(currentElement()))
+ {
+ keepChar();
+ break;
+ }
+ else if (isSeparating())
+ {
+ errorMessage("Unterminated string literal - Separating");
+ return;
+ }
+ else
+ keepChar();
+ }
+ auto ident = buffer[i .. bufferIndex - 1];
- scope(exit)
- {
- if (config.tokenStyle & TokenStyle.includeQuotes)
- setTokenValue();
- else
- {
- size_t b = 2 + ident.length;
- if (buffer[b] == '\r') ++b;
- if (buffer[b] == '\n') ++b;
- size_t e = bufferIndex;
- if (buffer[e - 1] == 'c' || buffer[e - 1] == 'd' || buffer[e - 1] == 'w')
- --e;
- setTokenValue(b, e);
- }
- }
+ scope(exit)
+ {
+ if (config.tokenStyle & TokenStyle.includeQuotes)
+ setTokenValue();
+ else
+ {
+ size_t b = 2 + ident.length;
+ if (buffer[b] == '\r') ++b;
+ if (buffer[b] == '\n') ++b;
+ size_t e = bufferIndex;
+ if (buffer[e - 1] == 'c' || buffer[e - 1] == 'd' || buffer[e - 1] == 'w')
+ --e;
+ setTokenValue(b, e);
+ }
+ }
- while (true)
- {
- if (isEoF())
- {
- errorMessage("Unterminated string literal");
- return;
- }
- else if (buffer[bufferIndex - ident.length .. bufferIndex] == ident)
- {
- if (currentElement() == '"')
- {
- keepChar();
- lexStringSuffix();
- return;
- }
- else
- {
- errorMessage("Unterminated string literal");
- return;
- }
- }
- else
- keepChar();
- }
- }
+ while (true)
+ {
+ if (isEoF())
+ {
+ errorMessage("Unterminated string literal");
+ return;
+ }
+ else if (buffer[bufferIndex - ident.length .. bufferIndex] == ident)
+ {
+ if (currentElement() == '"')
+ {
+ keepChar();
+ lexStringSuffix();
+ return;
+ }
+ else
+ {
+ errorMessage("Unterminated string literal");
+ return;
+ }
+ }
+ else
+ keepChar();
+ }
+ }
- void lexTokenString()
- in
- {
- assert (currentElement() == '{');
- }
- body
- {
- current.type = TokenType.stringLiteral;
- keepChar();
- LexerConfig c = config;
- config.iterStyle = IterationStyle.everything;
- config.tokenStyle = TokenStyle.source;
- size_t bi;
- ubyte[] b = uninitializedArray!(ubyte[])(1024 * 4);
- int depth = 1;
- while (!isEoF())
- {
- advance();
- while (bi + current.value.length >= b.length)
- b.length += 1024 * 4;
- b[bi .. bi + current.value.length] = cast(ubyte[]) current.value;
- bi += current.value.length;
- if (current.type == TokenType.lBrace)
- ++depth;
- else if (current.type == TokenType.rBrace)
- {
- --depth;
- if (depth <= 0)
- break;
- }
- }
- config = c;
- buffer[0] = 'q';
- buffer[1] = '{';
- buffer[2 .. bi + 2] = b[0 .. bi];
- buffer[bi++] = '}';
- bufferIndex = bi;
- if (config.tokenStyle & TokenStyle.includeQuotes)
- setTokenValue();
- else
- setTokenValue(2, bufferIndex - 1);
- lexStringSuffix();
- }
+ void lexTokenString()
+ in
+ {
+ assert (currentElement() == '{');
+ }
+ body
+ {
+ current.type = TokenType.stringLiteral;
+ keepChar();
+ LexerConfig c = config;
+ config.iterStyle = IterationStyle.everything;
+ config.tokenStyle = TokenStyle.source;
+ size_t bi;
+ ubyte[] b = uninitializedArray!(ubyte[])(1024 * 4);
+ int depth = 1;
+ while (!isEoF())
+ {
+ advance();
+ while (bi + current.value.length >= b.length)
+ b.length += 1024 * 4;
+ b[bi .. bi + current.value.length] = cast(ubyte[]) current.value;
+ bi += current.value.length;
+ if (current.type == TokenType.lBrace)
+ ++depth;
+ else if (current.type == TokenType.rBrace)
+ {
+ --depth;
+ if (depth <= 0)
+ break;
+ }
+ }
+ config = c;
+ buffer[0] = 'q';
+ buffer[1] = '{';
+ buffer[2 .. bi + 2] = b[0 .. bi];
+ buffer[bi++] = '}';
+ bufferIndex = bi;
+ if (config.tokenStyle & TokenStyle.includeQuotes)
+ setTokenValue();
+ else
+ setTokenValue(2, bufferIndex - 1);
+ lexStringSuffix();
+ }
- void lexSpecialTokenSequence()
- in
- {
- assert (currentElement() == '#');
- }
- body
- {
- keepChar();
- static if (isArray!R)
- auto r = range[index .. $];
- else
- auto r = range.save();
- auto app = appender!(ubyte[])();
- app.put('#');
- while (true)
- {
- if (r.isRangeEoF())
- {
- errorMessage("Found EOF when interpreting special token sequence");
- return;
- }
- else if (isNewline(r.front))
- break;
- else
- {
- app.put(r.front);
- r.popFront();
- }
- }
- auto m = match((cast(char[]) app.data),
- `#line\s+(?P