diff --git a/autocomplete.d b/autocomplete.d index 7b5e33b..b408fdc 100644 --- a/autocomplete.d +++ b/autocomplete.d @@ -13,11 +13,11 @@ import std.stdio; import std.typecons; import std.path; import std.file; +import std.d.lexer; import parser; import langutils; import types; -import tokenizer; immutable string[] versions = ["AIX", "all", "Alpha", "ARM", "BigEndian", "BSD", "Cygwin", "D_Coverage", "D_Ddoc", "DigitalMars", "D_InlineAsm_X86", @@ -31,348 +31,348 @@ immutable string[] versions = ["AIX", "all", "Alpha", "ARM", "BigEndian", "BSD", immutable string[] scopes = ["exit", "failure", "success"]; -/** - * Returns: indicies into the token array - */ -size_t findEndOfExpression(const Token[] tokens, const size_t index) -out (result) -{ - assert (result < tokens.length); - assert (result >= index); -} -body -{ - size_t i = index; - loop: while (i < tokens.length) - { - switch (tokens[i].type) - { - case TokenType.Return: - case TokenType.New: - case TokenType.Delete: - case TokenType.Comma: - case TokenType.RBrace: - case TokenType.RParen: - case TokenType.RBracket: - case TokenType.Semicolon: - break loop; - case TokenType.LParen: - skipParens(tokens, i); - break; - case TokenType.LBrace: - skipBraces(tokens, i); - break; - case TokenType.LBracket: - skipBrackets(tokens, i); - break; - default: - ++i; - break; - } - } - return i; -} - -size_t findBeginningOfExpression(const Token[] tokens, const size_t index) -in -{ - assert (index < tokens.length); - assert (tokens.length > 0); -} -out (result) -{ - import std.string; - assert (result < tokens.length); - assert (result <= index, format("findBeginningOfExpression %d, %d", result, index)); -} -body -{ - size_t i = index; - loop: while (i < tokens.length) - { - switch (tokens[i].type) - { - case TokenType.Assign: case TokenType.BitAnd: case TokenType.BitAndEquals: - case TokenType.BitOr: case TokenType.BitOrEquals: case TokenType.CatEquals: - case TokenType.Colon: case TokenType.Comma: case TokenType.Decrement: - case TokenType.Div: case TokenType.DivEquals: case TokenType.Dollar: - case TokenType.Equals: case TokenType.GoesTo: - case TokenType.Greater: case TokenType.GreaterEqual: case TokenType.Hash: - case TokenType.Increment: case TokenType.LBrace: case TokenType.LBracket: - case TokenType.Less: case TokenType.LessEqual: case TokenType.LessEqualGreater: - case TokenType.LessOrGreater: case TokenType.LogicAnd: case TokenType.LogicOr: - case TokenType.LParen: case TokenType.Minus: case TokenType.MinusEquals: - case TokenType.Mod: case TokenType.ModEquals: case TokenType.MulEquals: - case TokenType.Not: case TokenType.NotEquals: case TokenType.NotGreater: - case TokenType.NotGreaterEqual: case TokenType.NotLess: case TokenType.NotLessEqual: - case TokenType.NotLessEqualGreater: case TokenType.Plus: case TokenType.PlusEquals: - case TokenType.Pow: case TokenType.PowEquals: case TokenType.RBrace: - case TokenType.Semicolon: case TokenType.ShiftLeft: case TokenType.ShiftLeftEqual: - case TokenType.ShiftRight: case TokenType.ShiftRightEqual: case TokenType.Slice: - case TokenType.Star: case TokenType.Ternary: case TokenType.Tilde: - case TokenType.Unordered: case TokenType.UnsignedShiftRight: case TokenType.UnsignedShiftRightEqual: - case TokenType.Vararg: case TokenType.Xor: case TokenType.XorEquals: - case TokenType.KEYWORDS_BEGIN: .. case TokenType.KEYWORDS_END: - return i + 1; - case TokenType.RParen: - if (i == 0) - break loop; - skipParens(tokens, i); - break; - case TokenType.RBracket: - if (i == 0) - break loop; - skipBrackets(tokens, i); - break; - default: - if (i == 0) - break loop; - i--; - break; - } - } - return i + 1; -} - -const(Token)[] splitCallChain(const(Token)[] tokens) -{ - auto app = appender!(Token[])(); - size_t i = 0; - while (i < tokens.length) - { - app.put(tokens[i++]); - while (i < tokens.length && tokens[i] == TokenType.LParen) skipParens(tokens, i); - while (i < tokens.length && tokens[i] == TokenType.LBracket) skipBrackets(tokens, i); - while (i < tokens.length && tokens[i] == TokenType.Dot) ++i; - } - return app.data; -} - -unittest -{ - auto code = `a.b[10].c("grcl").x`; - auto tokens = tokenize(code); - assert (splitCallChain(tokens) == ["a", "b", "c", "x"]); -} - -struct AutoComplete -{ - this(const (Token)[] tokens, CompletionContext context) - { - this.tokens = tokens; - this.context = context; - } - - string getTypeOfExpression(const(Token)[] expression, const Token[] tokens, size_t cursor) - { - stderr.writeln("getting type of ", expression); - if (expression.length == 0) - return "void"; - auto type = typeOfVariable(expression[0], cursor); - if (type is null) - return "void"; - size_t index = 1; - while (index < expression.length) - { - const Tuple!(string, string)[string] typeMap = context.getMembersOfType( - type); - const Tuple!(string, string)* memberType = expression[index].value in typeMap; - if (memberType is null) - return "void"; - else - type = (*memberType)[0]; - index++; - } - return type; - } - - string typeOfVariable(Token symbol, size_t cursor) - { - // int is of type int, double of type double, and so on - if (symbol.value in typeProperties) - return symbol.value; - - string tokenType = getTypeFromToken(symbol); - if (tokenType !is null) - return tokenType; - - if (context.getMembersOfType(symbol.value)) - return symbol.value; - - // Arbitrarily define the depth of the cursor position as zero - // iterate backwards through the code to try to find the variable - int depth = 0; - auto preceedingTokens = assumeSorted(tokens).lowerBound(cursor); - auto index = preceedingTokens.length - 1; - while (true) - { - if (preceedingTokens[index] == TokenType.LBrace) - --depth; - else if (preceedingTokens[index] == TokenType.RBrace) - ++depth; - else if (depth <= 0 && preceedingTokens[index].value == symbol) - { - // Found the symbol, now determine if it was declared here. - auto p = preceedingTokens[index - 1]; - - - if ((p == TokenType.Auto || p == TokenType.Immutable - || p == TokenType.Const) - && preceedingTokens[index + 1] == TokenType.Assign) - { - // Try to determine the type of a variable declared as "auto" - return getTypeOfExpression( - tokens[index + 2 .. findEndOfExpression(tokens, index + 2)], - tokens, cursor); - } - else if (p == TokenType.Identifier - || (p.type > TokenType.TYPES_BEGIN - && p.type < TokenType.TYPES_END)) - { - // Handle simple cases like "int a;" or "Someclass instance;" - return p.value; - } - else if (p == TokenType.RBracket || p == TokenType.RParen) - { - return combineTokens(tokens[findBeginningOfExpression(tokens, index) .. index]); - } - } - if (index == 0) - break; - else - --index; - } - - // Find all struct or class bodies that we're in. - // Check for the symbol in those class/struct/interface bodies - // if match is found, return it - auto structs = context.getStructsContaining(cursor); - if (symbol == "this" && structs.length > 0) - { - return minCount!("a.bodyStart > b.bodyStart")(structs)[0].name; - } - - foreach (s; structs) - { - auto t = s.getMemberType(symbol.value); - if (t !is null) - return t; - } - return "void"; - } - - string symbolAt(size_t cursor) const - { - auto r = assumeSorted(tokens).lowerBound(cursor)[$ - 1]; - if (r.value.length + r.startIndex > cursor) - return r.value; - else - return null; - } - - string parenComplete(size_t cursor) - { - auto index = assumeSorted(tokens).lowerBound(cursor).length - 2; - Token t = tokens[index]; - switch (tokens[index].type) - { - case TokenType.Version: - return "completions\n" ~ to!string(join(map!`a ~ " k"`(versions), "\n").array()); - case TokenType.Scope: - return "completions\n" ~ to!string(join(map!`a ~ " k"`(scopes), "\n").array()); - case TokenType.If: - case TokenType.Cast: - case TokenType.While: - case TokenType.For: - case TokenType.Foreach: - case TokenType.Switch: - return ""; - default: - size_t startIndex = findBeginningOfExpression(tokens, index); - auto callChain = splitCallChain(tokens[startIndex .. index + 1]); - auto expressionType = getTypeOfExpression( - callChain[0 .. $ - 1], tokens, cursor); - return "calltips\n" ~ to!string(context.getCallTipsFor(expressionType, - callChain[$ - 1].value, cursor).join("\n").array()); - } - } - - string dotComplete(size_t cursor) - { - stderr.writeln("dotComplete"); - auto index = assumeSorted(tokens).lowerBound(cursor).length - 1; - Token t = tokens[index]; - - // If the last character entered before the cursor isn't a dot, give up. - // The user was probably in the middle of typing the slice or vararg - // operators - if (t != TokenType.Dot) - return null; - - size_t startIndex = findBeginningOfExpression(tokens, index); - if (startIndex - 1 < tokens.length && tokens[startIndex - 1] == TokenType.Import) - { - return importComplete(splitCallChain(tokens[startIndex .. index])); - } - - auto expressionType = getTypeOfExpression( - splitCallChain(tokens[startIndex .. index]), tokens, cursor); - - stderr.writeln("expression type is ", expressionType); - - // Complete pointers and references the same way - if (expressionType[$ - 1] == '*') - expressionType = expressionType[0 .. $ - 1]; - - const Tuple!(string, string)[string] typeMap = context.getMembersOfType( - expressionType); - if (typeMap is null) - return ""; - auto app = appender!(string[])(); - foreach (k, t; typeMap) - app.put(k ~ " " ~ t[1]); - return to!string(array(join(sort!("a.toLower() < b.toLower()")(app.data), "\n"))); - } - - string importComplete(const(Token)[] tokens) - { - stderr.writeln("importComplete"); - auto app = appender!(string[])(); - string part = to!string(map!"a.value.dup"(tokens).join("/").array()); - foreach (path; context.importDirectories) - { - stderr.writeln("Searching for ", path, "/", part); - if (!exists(buildPath(path, part))) - continue; - stderr.writeln("found it"); - foreach (DirEntry dirEntry; dirEntries(buildPath(path, part), - SpanMode.shallow)) - { - if (dirEntry.isDir) - app.put(baseName(dirEntry.name) ~ " P"); - else if (dirEntry.name.endsWith(".d", ".di")) - app.put(stripExtension(baseName(dirEntry.name)) ~ " M"); - } - } - return to!string(sort!("a.toLower() < b.toLower()")(app.data).join("\n").array()); - } - - const(Token)[] tokens; - CompletionContext context; -} - -unittest -{ - auto code = q{ -struct TestStruct { int a; int b; } -TestStruct ts; -ts.a. - }; - - auto tokens = tokenize(code); - auto mod = parseModule(tokens); - auto context = new CompletionContext(mod); - auto completion = AutoComplete(tokens, context); - assert (completion.getTypeOfExpression(splitCallChain(tokens[13 .. 16]), - tokens, 56) == "int"); -} +///** +// * Returns: indicies into the token array +// */ +//size_t findEndOfExpression(const Token[] tokens, const size_t index) +//out (result) +//{ +// assert (result < tokens.length); +// assert (result >= index); +//} +//body +//{ +// size_t i = index; +// loop: while (i < tokens.length) +// { +// switch (tokens[i].type) +// { +// case TokenType.Return: +// case TokenType.New: +// case TokenType.Delete: +// case TokenType.Comma: +// case TokenType.RBrace: +// case TokenType.RParen: +// case TokenType.RBracket: +// case TokenType.Semicolon: +// break loop; +// case TokenType.LParen: +// skipParens(tokens, i); +// break; +// case TokenType.LBrace: +// skipBraces(tokens, i); +// break; +// case TokenType.LBracket: +// skipBrackets(tokens, i); +// break; +// default: +// ++i; +// break; +// } +// } +// return i; +//} +// +//size_t findBeginningOfExpression(const Token[] tokens, const size_t index) +//in +//{ +// assert (index < tokens.length); +// assert (tokens.length > 0); +//} +//out (result) +//{ +// import std.string; +// assert (result < tokens.length); +// assert (result <= index, format("findBeginningOfExpression %d, %d", result, index)); +//} +//body +//{ +// size_t i = index; +// loop: while (i < tokens.length) +// { +// switch (tokens[i].type) +// { +// case TokenType.Assign: case TokenType.BitAnd: case TokenType.BitAndEquals: +// case TokenType.BitOr: case TokenType.BitOrEquals: case TokenType.CatEquals: +// case TokenType.Colon: case TokenType.Comma: case TokenType.Decrement: +// case TokenType.Div: case TokenType.DivEquals: case TokenType.Dollar: +// case TokenType.Equals: case TokenType.GoesTo: +// case TokenType.Greater: case TokenType.GreaterEqual: case TokenType.Hash: +// case TokenType.Increment: case TokenType.LBrace: case TokenType.LBracket: +// case TokenType.Less: case TokenType.LessEqual: case TokenType.LessEqualGreater: +// case TokenType.LessOrGreater: case TokenType.LogicAnd: case TokenType.LogicOr: +// case TokenType.LParen: case TokenType.Minus: case TokenType.MinusEquals: +// case TokenType.Mod: case TokenType.ModEquals: case TokenType.MulEquals: +// case TokenType.Not: case TokenType.NotEquals: case TokenType.NotGreater: +// case TokenType.NotGreaterEqual: case TokenType.NotLess: case TokenType.NotLessEqual: +// case TokenType.NotLessEqualGreater: case TokenType.Plus: case TokenType.PlusEquals: +// case TokenType.Pow: case TokenType.PowEquals: case TokenType.RBrace: +// case TokenType.Semicolon: case TokenType.ShiftLeft: case TokenType.ShiftLeftEqual: +// case TokenType.ShiftRight: case TokenType.ShiftRightEqual: case TokenType.Slice: +// case TokenType.Star: case TokenType.Ternary: case TokenType.Tilde: +// case TokenType.Unordered: case TokenType.UnsignedShiftRight: case TokenType.UnsignedShiftRightEqual: +// case TokenType.Vararg: case TokenType.Xor: case TokenType.XorEquals: +// case TokenType.KEYWORDS_BEGIN: .. case TokenType.KEYWORDS_END: +// return i + 1; +// case TokenType.RParen: +// if (i == 0) +// break loop; +// skipParens(tokens, i); +// break; +// case TokenType.RBracket: +// if (i == 0) +// break loop; +// skipBrackets(tokens, i); +// break; +// default: +// if (i == 0) +// break loop; +// i--; +// break; +// } +// } +// return i + 1; +//} +// +//const(Token)[] splitCallChain(const(Token)[] tokens) +//{ +// auto app = appender!(Token[])(); +// size_t i = 0; +// while (i < tokens.length) +// { +// app.put(tokens[i++]); +// while (i < tokens.length && tokens[i] == TokenType.LParen) skipParens(tokens, i); +// while (i < tokens.length && tokens[i] == TokenType.LBracket) skipBrackets(tokens, i); +// while (i < tokens.length && tokens[i] == TokenType.Dot) ++i; +// } +// return app.data; +//} +// +//unittest +//{ +// auto code = `a.b[10].c("grcl").x`; +// auto tokens = tokenize(code); +// assert (splitCallChain(tokens) == ["a", "b", "c", "x"]); +//} +// +//struct AutoComplete +//{ +// this(const (Token)[] tokens, CompletionContext context) +// { +// this.tokens = tokens; +// this.context = context; +// } +// +// string getTypeOfExpression(const(Token)[] expression, const Token[] tokens, size_t cursor) +// { +// stderr.writeln("getting type of ", expression); +// if (expression.length == 0) +// return "void"; +// auto type = typeOfVariable(expression[0], cursor); +// if (type is null) +// return "void"; +// size_t index = 1; +// while (index < expression.length) +// { +// const Tuple!(string, string)[string] typeMap = context.getMembersOfType( +// type); +// const Tuple!(string, string)* memberType = expression[index].value in typeMap; +// if (memberType is null) +// return "void"; +// else +// type = (*memberType)[0]; +// index++; +// } +// return type; +// } +// +// string typeOfVariable(Token symbol, size_t cursor) +// { +// // int is of type int, double of type double, and so on +// if (symbol.value in typeProperties) +// return symbol.value; +// +// string tokenType = getTypeFromToken(symbol); +// if (tokenType !is null) +// return tokenType; +// +// if (context.getMembersOfType(symbol.value)) +// return symbol.value; +// +// // Arbitrarily define the depth of the cursor position as zero +// // iterate backwards through the code to try to find the variable +// int depth = 0; +// auto preceedingTokens = assumeSorted(tokens).lowerBound(cursor); +// auto index = preceedingTokens.length - 1; +// while (true) +// { +// if (preceedingTokens[index] == TokenType.LBrace) +// --depth; +// else if (preceedingTokens[index] == TokenType.RBrace) +// ++depth; +// else if (depth <= 0 && preceedingTokens[index].value == symbol) +// { +// // Found the symbol, now determine if it was declared here. +// auto p = preceedingTokens[index - 1]; +// +// +// if ((p == TokenType.Auto || p == TokenType.Immutable +// || p == TokenType.Const) +// && preceedingTokens[index + 1] == TokenType.Assign) +// { +// // Try to determine the type of a variable declared as "auto" +// return getTypeOfExpression( +// tokens[index + 2 .. findEndOfExpression(tokens, index + 2)], +// tokens, cursor); +// } +// else if (p == TokenType.Identifier +// || (p.type > TokenType.TYPES_BEGIN +// && p.type < TokenType.TYPES_END)) +// { +// // Handle simple cases like "int a;" or "Someclass instance;" +// return p.value; +// } +// else if (p == TokenType.RBracket || p == TokenType.RParen) +// { +// return combineTokens(tokens[findBeginningOfExpression(tokens, index) .. index]); +// } +// } +// if (index == 0) +// break; +// else +// --index; +// } +// +// // Find all struct or class bodies that we're in. +// // Check for the symbol in those class/struct/interface bodies +// // if match is found, return it +// auto structs = context.getStructsContaining(cursor); +// if (symbol == "this" && structs.length > 0) +// { +// return minCount!("a.bodyStart > b.bodyStart")(structs)[0].name; +// } +// +// foreach (s; structs) +// { +// auto t = s.getMemberType(symbol.value); +// if (t !is null) +// return t; +// } +// return "void"; +// } +// +// string symbolAt(size_t cursor) const +// { +// auto r = assumeSorted(tokens).lowerBound(cursor)[$ - 1]; +// if (r.value.length + r.startIndex > cursor) +// return r.value; +// else +// return null; +// } +// +// string parenComplete(size_t cursor) +// { +// auto index = assumeSorted(tokens).lowerBound(cursor).length - 2; +// Token t = tokens[index]; +// switch (tokens[index].type) +// { +// case TokenType.Version: +// return "completions\n" ~ to!string(join(map!`a ~ " k"`(versions), "\n").array()); +// case TokenType.Scope: +// return "completions\n" ~ to!string(join(map!`a ~ " k"`(scopes), "\n").array()); +// case TokenType.If: +// case TokenType.Cast: +// case TokenType.While: +// case TokenType.For: +// case TokenType.Foreach: +// case TokenType.Switch: +// return ""; +// default: +// size_t startIndex = findBeginningOfExpression(tokens, index); +// auto callChain = splitCallChain(tokens[startIndex .. index + 1]); +// auto expressionType = getTypeOfExpression( +// callChain[0 .. $ - 1], tokens, cursor); +// return "calltips\n" ~ to!string(context.getCallTipsFor(expressionType, +// callChain[$ - 1].value, cursor).join("\n").array()); +// } +// } +// +// string dotComplete(size_t cursor) +// { +// stderr.writeln("dotComplete"); +// auto index = assumeSorted(tokens).lowerBound(cursor).length - 1; +// Token t = tokens[index]; +// +// // If the last character entered before the cursor isn't a dot, give up. +// // The user was probably in the middle of typing the slice or vararg +// // operators +// if (t != TokenType.Dot) +// return null; +// +// size_t startIndex = findBeginningOfExpression(tokens, index); +// if (startIndex - 1 < tokens.length && tokens[startIndex - 1] == TokenType.Import) +// { +// return importComplete(splitCallChain(tokens[startIndex .. index])); +// } +// +// auto expressionType = getTypeOfExpression( +// splitCallChain(tokens[startIndex .. index]), tokens, cursor); +// +// stderr.writeln("expression type is ", expressionType); +// +// // Complete pointers and references the same way +// if (expressionType[$ - 1] == '*') +// expressionType = expressionType[0 .. $ - 1]; +// +// const Tuple!(string, string)[string] typeMap = context.getMembersOfType( +// expressionType); +// if (typeMap is null) +// return ""; +// auto app = appender!(string[])(); +// foreach (k, t; typeMap) +// app.put(k ~ " " ~ t[1]); +// return to!string(array(join(sort!("a.toLower() < b.toLower()")(app.data), "\n"))); +// } +// +// string importComplete(const(Token)[] tokens) +// { +// stderr.writeln("importComplete"); +// auto app = appender!(string[])(); +// string part = to!string(map!"a.value.dup"(tokens).join("/").array()); +// foreach (path; context.importDirectories) +// { +// stderr.writeln("Searching for ", path, "/", part); +// if (!exists(buildPath(path, part))) +// continue; +// stderr.writeln("found it"); +// foreach (DirEntry dirEntry; dirEntries(buildPath(path, part), +// SpanMode.shallow)) +// { +// if (dirEntry.isDir) +// app.put(baseName(dirEntry.name) ~ " P"); +// else if (dirEntry.name.endsWith(".d", ".di")) +// app.put(stripExtension(baseName(dirEntry.name)) ~ " M"); +// } +// } +// return to!string(sort!("a.toLower() < b.toLower()")(app.data).join("\n").array()); +// } +// +// const(Token)[] tokens; +// CompletionContext context; +//} +// +//unittest +//{ +// auto code = q{ +//struct TestStruct { int a; int b; } +//TestStruct ts; +//ts.a. +// }; +// +// auto tokens = tokenize(code); +// auto mod = parseModule(tokens); +// auto context = new CompletionContext(mod); +// auto completion = AutoComplete(tokens, context); +// assert (completion.getTypeOfExpression(splitCallChain(tokens[13 .. 16]), +// tokens, 56) == "int"); +//} diff --git a/build.sh b/build.sh index 2250998..f0f544f 100755 --- a/build.sh +++ b/build.sh @@ -1,2 +1,2 @@ -dmd *.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline -#dmd *.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest +dmd *.d std/d/*.d -release -noboundscheck -O -w -wi -m64 -property -ofdscanner -L-lsqlite3 #-inline +#dmd *.d std/d/*.d -g -m64 -w -wi -property -ofdscanner -L-lsqlite3 #-unittest diff --git a/cache.d b/cache.d index c2ea3fa..6398422 100644 --- a/cache.d +++ b/cache.d @@ -13,11 +13,11 @@ import std.uuid; import std.array; import std.string; import std.conv; +import std.d.lexer; import location; import parser; import types; -import tokenizer; private sqlite3* database; @@ -104,10 +104,10 @@ void updateCache(string dirs[], string moduleNames[]) if (timeLastModified.stdTime == mtime) continue; - // re-parse the module - Module m = parseModule(byToken(readText(filePath)).array()); - - updateCache(m); +// // re-parse the module +// Module m = parseModule(byToken(readText(filePath)).array()); +// +// updateCache(m); sqlite3_reset(statement); } diff --git a/circularbuffer.d b/circularbuffer.d index 714720a..9c45128 100644 --- a/circularbuffer.d +++ b/circularbuffer.d @@ -9,11 +9,12 @@ import std.math; import std.array; import std.range; -struct CircularBuffer(T, R) if (isInputRange!(R) && is (ElementType!(R) == T)) +class CircularBuffer(T) : InputRange!(T) + { public: - this (size_t size, R range) + this (size_t size, InputRange!(T) range) { this.range = range; this.margin = size; @@ -31,41 +32,33 @@ public: } } - T opIndex(size_t index) const - in - { - assert (index <= sourceIndex + margin); - assert (index >= sourceIndex - margin); - } - body - { - return data[index % data.length]; - } - - T front() const @property + override T front() const @property { return data[index]; } - T peek(int offset) + T peek(int offset = 1) in { - assert(abs(offset) <= margin); - assert(sourceIndex + offset >= 0); + assert(canPeek(offset)); } body { return data[(index + offset) % data.length]; } - T popFront() + bool canPeek(int offset = 1) + { + return abs(offset) <= margin && sourceIndex + offset >= 0; + } + + override void popFront() in { assert (!_empty); } body { - T v = data[index]; index = (index + 1) % data.length; ++sourceIndex; if (range.empty()) @@ -79,7 +72,6 @@ public: end = (end + 1) % data.length; range.popFront(); } - return v; } bool empty() const @property @@ -87,8 +79,40 @@ public: return _empty; } + override T moveFront() + { + auto r = front(); + popFront(); + return r; + } + + override int opApply(int delegate(T) dg) + { + int result = 0; + while (!empty) + { + result = dg(front); + if (result) + break; + } + return result; + } + + override int opApply(int delegate(size_t, T) dg) + { + int result = 0; + int i = 0; + while (!empty) + { + result = dg(i, front); + if (result) + break; + } + return result; + } + private: - R range; + InputRange!(T) range; immutable size_t margin; T[] data; size_t sourceIndex; @@ -123,8 +147,6 @@ unittest buf.popFront(); buf.popFront(); assert (buf.front == 4); - assert (buf[2] == 2); - assert (buf[6] == 6); } unittest diff --git a/codegen.d b/codegen.d deleted file mode 100644 index 822c5d3..0000000 --- a/codegen.d +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright Brian Schott (Sir Alaran) 2012. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -// This module triggers DMD bug 7900 if compiled with -inline - -module codegen; - -import std.range; - - -class Trie(K, V) if (isInputRange!K): TrieNode!(K, V) -{ - /** - * Adds the given value to the trie with the given key - */ - void add(K key, V value) pure - { - TrieNode!(K,V) current = this; - foreach(keyPart; key) - { - if ((keyPart in current.children) is null) - { - auto node = new TrieNode!(K, V); - current.children[keyPart] = node; - current = node; - } - else - current = current.children[keyPart]; - } - current.value = value; - } -} - -class TrieNode(K, V) if (isInputRange!K) -{ - V value; - TrieNode!(K,V)[ElementType!K] children; -} - -string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString) -{ - string caseStatement = ""; - foreach(dchar k, TrieNode!(K,V) v; node.children) - { - caseStatement ~= indentString; - caseStatement ~= "case '"; - caseStatement ~= k; - caseStatement ~= "':\n"; - caseStatement ~= indentString; - caseStatement ~= "\tcurrent.value ~= '"; - caseStatement ~= k; - caseStatement ~= "';\n"; - caseStatement ~= indentString; - caseStatement ~= "\t++index;\n"; - caseStatement ~= indentString; - caseStatement ~= "\trange.popFront();\n"; - if (v.children.length > 0) - { - caseStatement ~= indentString; - caseStatement ~= "\tif (range.isEoF())\n"; - caseStatement ~= indentString; - caseStatement ~= "\t{\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tcurrent.type = " ~ node.children[k].value; - caseStatement ~= ";\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tbreak;\n"; - caseStatement ~= indentString; - caseStatement ~= "\t}\n"; - caseStatement ~= indentString; - caseStatement ~= "\tswitch (range.front)\n"; - caseStatement ~= indentString; - caseStatement ~= "\t{\n"; - caseStatement ~= printCaseStatements(v, indentString ~ "\t"); - caseStatement ~= indentString; - caseStatement ~= "\tdefault:\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tcurrent.type = "; - caseStatement ~= v.value; - caseStatement ~= ";\n"; - caseStatement ~= indentString; - caseStatement ~= "\t\tbreak;\n"; - caseStatement ~= indentString; - caseStatement ~= "\t}\n"; - caseStatement ~= indentString; - caseStatement ~= "\tbreak;\n"; - } - else - { - caseStatement ~= indentString; - caseStatement ~= "\tcurrent.type = "; - caseStatement ~= v.value; - caseStatement ~= ";\n"; - caseStatement ~= indentString; - caseStatement ~= "\tbreak;\n"; - } - } - return caseStatement; -} - -string generateCaseTrie(string[] args ...) -{ - auto t = new Trie!(string, string); - for(int i = 0; i < args.length; i+=2) - { - t.add(args[i], args[i+1]); - } - return printCaseStatements(t, ""); -} diff --git a/highlighter.d b/highlighter.d index 032c3f4..03286f7 100644 --- a/highlighter.d +++ b/highlighter.d @@ -7,8 +7,10 @@ module highlighter; import std.stdio; -import langutils; import std.array; +import std.d.lexer; + +import langutils; void writeSpan(string cssClass, string value) { @@ -23,13 +25,13 @@ void highlight(R)(R tokens)
]");
diff --git a/langutils.d b/langutils.d
index 1649f4a..cf0c995 100644
--- a/langutils.d
+++ b/langutils.d
@@ -4,7 +4,9 @@
// http://www.boost.org/LICENSE_1_0.txt)
module langutils;
+
import std.array;
+import std.d.lexer;
/**
@@ -33,18 +35,6 @@ pure nothrow bool isAttribute(TokenType input)
return input > TokenType.ATTRIBUTES_BEGIN && input < TokenType.ATTRIBUTES_END;
}
-/**
- * Returns: the token type for the given string. Defaults to "identifier"
- */
-pure nothrow TokenType lookupTokenType(const string input)
-{
- immutable(TokenType)* type = input in tokenLookup;
- if (type !is null)
- return *type;
- else
- return TokenType.Identifier;
-}
-
string combineTokens(ref const Token[] tokens)
{
auto app = appender!string();
@@ -53,533 +43,7 @@ string combineTokens(ref const Token[] tokens)
return app.data;
}
-pure nothrow TokenType lookupTokenTypeOptimized(const string input)
-{
- switch(input.length)
- {
- case 2:
- switch (input)
- {
- case "do": return TokenType.Do;
- case "if": return TokenType.If;
- case "in": return TokenType.In;
- case "is": return TokenType.Is;
- default: break;
- }
- break;
- case 3:
- switch (input)
- {
- case "asm": return TokenType.Asm;
- case "for": return TokenType.For;
- case "int": return TokenType.Int;
- case "new": return TokenType.New;
- case "out": return TokenType.Out;
- case "ref": return TokenType.Ref;
- case "try": return TokenType.Try;
- default: break;
- }
- break;
- case 4:
- switch (input)
- {
- case "auto": return TokenType.Auto;
- case "body": return TokenType.Body;
- case "bool": return TokenType.Bool;
- case "byte": return TokenType.Byte;
- case "case": return TokenType.Case;
- case "cast": return TokenType.Cast;
- case "cent": return TokenType.Cent;
- case "char": return TokenType.Char;
- case "else": return TokenType.Else;
- case "enum": return TokenType.Enum;
- case "goto": return TokenType.Goto;
- case "lazy": return TokenType.Lazy;
- case "long": return TokenType.Long;
- case "null": return TokenType.Null;
- case "pure": return TokenType.Pure;
- case "real": return TokenType.Real;
- case "this": return TokenType.This;
- case "true": return TokenType.True;
- case "uint": return TokenType.Uint;
- case "void": return TokenType.Void;
- case "with": return TokenType.With;
- default: break;
- }
- break;
- case 5:
- switch (input)
- {
- case "alias": return TokenType.Alias;
- case "align": return TokenType.Align;
- case "break": return TokenType.Break;
- case "catch": return TokenType.Catch;
- case "class": return TokenType.Class;
- case "const": return TokenType.Const;
- case "creal": return TokenType.Creal;
- case "dchar": return TokenType.Dchar;
- case "debug": return TokenType.Debug;
- case "false": return TokenType.False;
- case "final": return TokenType.Final;
- case "float": return TokenType.Float;
- case "inout": return TokenType.Inout;
- case "ireal": return TokenType.Ireal;
- case "macro": return TokenType.Macro;
- case "mixin": return TokenType.Mixin;
- case "scope": return TokenType.Scope;
- case "short": return TokenType.Short;
- case "super": return TokenType.Super;
- case "throw": return TokenType.Throw;
- case "ubyte": return TokenType.Ubyte;
- case "ucent": return TokenType.Ucent;
- case "ulong": return TokenType.Ulong;
- case "union": return TokenType.Union;
- case "wchar": return TokenType.Wchar;
- case "while": return TokenType.While;
- default: break;
- }
- break;
- case 6:
- switch (input)
- {
- case "assert": return TokenType.Assert;
- case "cfloat": return TokenType.Cfloat;
- case "delete": return TokenType.Delete;
- case "double": return TokenType.Double;
- case "export": return TokenType.Export;
- case "extern": return TokenType.Extern;
- case "ifloat": return TokenType.Ifloat;
- case "import": return TokenType.Import;
- case "module": return TokenType.Module;
- case "pragma": return TokenType.Pragma;
- case "public": return TokenType.Public;
- case "return": return TokenType.Return;
- case "shared": return TokenType.Shared;
- case "static": return TokenType.Static;
- case "string": return TokenType.String;
- case "struct": return TokenType.Struct;
- case "switch": return TokenType.Switch;
- case "typeid": return TokenType.Typeid;
- case "typeof": return TokenType.Typeof;
- case "ushort": return TokenType.Ushort;
- default: break;
- }
- break;
- case 7:
- switch (input)
- {
- case "cdouble": return TokenType.Cdouble;
- case "default": return TokenType.Default;
- case "dstring": return TokenType.DString;
- case "finally": return TokenType.Finally;
- case "foreach": return TokenType.Foreach;
- case "idouble": return TokenType.Idouble;
- case "nothrow": return TokenType.Nothrow;
- case "package": return TokenType.Package;
- case "private": return TokenType.Private;
- case "typedef": return TokenType.Typedef;
- case "version": return TokenType.Version;
- case "wstring": return TokenType.WString;
- default: break;
- }
- break;
- case 8:
- switch (input)
- {
- case "override": return TokenType.Override;
- case "continue": return TokenType.Continue;
- case "__LINE__": return TokenType.Line;
- case "template": return TokenType.Template;
- case "abstract": return TokenType.Abstract;
- case "__thread": return TokenType.Thread;
- case "__traits": return TokenType.Traits;
- case "volatile": return TokenType.Volatile;
- case "delegate": return TokenType.Delegate;
- case "function": return TokenType.Function;
- case "unittest": return TokenType.Unittest;
- case "__FILE__": return TokenType.File;
- default: break;
- }
- break;
- case 9:
- switch (input)
- {
- case "__gshared": return TokenType.Gshared;
- case "immutable": return TokenType.Immutable;
- case "interface": return TokenType.Interface;
- case "invariant": return TokenType.Invariant;
- case "protected": return TokenType.Protected;
- default: break;
- }
- break;
- case 10:
- if (input == "deprecated")
- return TokenType.Deprecated;
- break;
- case 11:
- if (input == "synchronized")
- return TokenType.Synchronized;
- break;
- case 13:
- if (input == "foreach_reverse")
- return TokenType.Foreach_reverse;
- break;
- default: break;
- }
- return TokenType.Identifier;
-}
-
-
-/**
- * Listing of all the tokens in the D language
- */
-enum TokenType: uint
-{
-// Operators
- OPERATORS_BEGIN,
- Assign, /// =
- At, /// @
- BitAnd, /// &
- BitAndEquals, /// &=
- BitOr, /// |
- BitOrEquals, /// |=
- CatEquals, /// ~=
- Colon, /// :
- Comma, /// ,
- Decrement, /// --
- Div, /// /
- DivEquals, /// /=
- Dollar, /// $
- Dot, /// .
- Equals, /// ==
- GoesTo, // =>
- Greater, /// >
- GreaterEqual, /// >=
- Hash, // #
- Increment, /// ++
- LBrace, /// {
- LBracket, /// [
- Less, /// <
- LessEqual, /// <=
- LessEqualGreater, // <>=
- LessOrGreater, /// <>
- LogicAnd, /// &&
- LogicOr, /// ||
- LParen, /// $(LPAREN)
- Minus, /// -
- MinusEquals, /// -=
- Mod, /// %
- ModEquals, /// %=
- MulEquals, /// *=
- Not, /// !
- NotEquals, /// !=
- NotGreater, /// !>
- NotGreaterEqual, /// !>=
- NotLess, /// !<
- NotLessEqual, /// !<=
- NotLessEqualGreater, /// !<>
- Plus, /// +
- PlusEquals, /// +=
- Pow, /// ^^
- PowEquals, /// ^^=
- RBrace, /// }
- RBracket, /// ]
- RParen, /// $(RPAREN)
- Semicolon, /// ;
- ShiftLeft, /// <<
- ShiftLeftEqual, /// <<=
- ShiftRight, /// >>
- ShiftRightEqual, /// >>=
- Slice, // ..
- Star, /// *
- Ternary, /// ?
- Tilde, /// ~
- Unordered, /// !<>=
- UnsignedShiftRight, /// >>>
- UnsignedShiftRightEqual, /// >>>=
- Vararg, /// ...
- Xor, /// ^
- XorEquals, /// ^=
- OPERATORS_END,
-
- // Types
- TYPES_BEGIN,
- Bool, /// bool,
- Byte, /// byte,
- Cdouble, /// cdouble,
- Cent, /// cent,
- Cfloat, /// cfloat,
- Char, /// char,
- Creal, /// creal,
- Dchar, /// dchar,
- Double, /// double,
- DString, /// dstring
- Float, /// float,
- Function, /// function,
- Idouble, /// idouble,
- Ifloat, /// ifloat,
- Int, /// int,
- Ireal, /// ireal,
- Long, /// long,
- Real, /// real,
- Short, /// short,
- String, /// string
- Ubyte, /// ubyte,
- Ucent, /// ucent,
- Uint, /// uint,
- Ulong, /// ulong,
- Ushort, /// ushort,
- Void, /// void,
- Wchar, /// wchar,
- WString, /// wstring
- TYPES_END,
- Template, /// template,
-
- // Keywords
- KEYWORDS_BEGIN,
- ATTRIBUTES_BEGIN,
- Align, /// align,
- Deprecated, /// deprecated,
- Extern, /// extern,
- Pragma, /// pragma,
- PROTECTION_BEGIN,
- Export, /// export,
- Package, /// package,
- Private, /// private,
- Protected, /// protected,
- Public, /// public,
- PROTECTION_END,
- Abstract, /// abstract,
- AtDisable, /// @disable
- Auto, /// auto,
- Const, /// const,
- Final, /// final
- Gshared, /// __gshared,
- Immutable, // immutable,
- Inout, // inout,
- Scope, /// scope,
- Shared, // shared,
- Static, /// static,
- Synchronized, /// synchronized,
- ATTRIBUTES_END,
- Alias, /// alias,
- Asm, /// asm,
- Assert, /// assert,
- Body, /// body,
- Break, /// break,
- Case, /// case,
- Cast, /// cast,
- Catch, /// catch,
- Class, /// class,
- Continue, /// continue,
- Debug, /// debug,
- Default, /// default,
- Delegate, /// delegate,
- Delete, /// delete,
- Do, /// do,
- Else, /// else,
- Enum, /// enum,
- False, /// false,
- Finally, /// finally,
- Foreach, /// foreach,
- Foreach_reverse, /// foreach_reverse,
- For, /// for,
- Goto, /// goto,
- If, /// if ,
- Import, /// import,
- In, /// in,
- Interface, /// interface,
- Invariant, /// invariant,
- Is, /// is,
- Lazy, /// lazy,
- Macro, /// macro,
- Mixin, /// mixin,
- Module, /// module,
- New, /// new,
- Nothrow, /// nothrow,
- Null, /// null,
- Out, /// out,
- Override, /// override,
- Pure, /// pure,
- Ref, /// ref,
- Return, /// return,
- Struct, /// struct,
- Super, /// super,
- Switch, /// switch ,
- This, /// this,
- Throw, /// throw,
- True, /// true,
- Try, /// try,
- Typedef, /// typedef,
- Typeid, /// typeid,
- Typeof, /// typeof,
- Union, /// union,
- Unittest, /// unittest,
- Version, /// version,
- Volatile, /// volatile,
- While, /// while ,
- With, /// with,
- KEYWORDS_END,
-
-// Constants
- CONSTANTS_BEGIN,
- File, /// __FILE__,
- Line, /// __LINE__,
- Thread, /// __thread,
- Traits, /// __traits,
- CONSTANTS_END,
-
-// Misc
- MISC_BEGIN,
- Blank, /// unknown token type
- Comment, /// /** comment */ or // comment or ///comment
- Identifier, /// anything else
- ScriptLine, // Line at the beginning of source file that starts from #!
- Whitespace, /// whitespace
- NUMBERS_BEGIN,
- DoubleLiteral, /// 123.456
- FloatLiteral, /// 123.456f or 0x123_45p-af
- IDoubleLiteral, /// 123.456i
- IFloatLiteral, /// 123.456fi
- IntLiteral, /// 123 or 0b1101010101
- LongLiteral, /// 123L
- RealLiteral, /// 123.456L
- IRealLiteral, /// 123.456Li
- UnsignedIntLiteral, /// 123u
- UnsignedLongLiteral, /// 123uL
- NUMBERS_END,
- STRINGS_BEGIN,
- DStringLiteral, /// "32-bit character string"d
- StringLiteral, /// "a string"
- WStringLiteral, /// "16-bit character string"w
- STRINGS_END,
- MISC_END,
-}
-
-
-/**
- * lookup table for converting strings to tokens
- */
-immutable TokenType[string] tokenLookup;
-
-
-static this()
-{
- tokenLookup = [
- "abstract" : TokenType.Abstract,
- "alias" : TokenType.Alias,
- "align" : TokenType.Align,
- "asm" : TokenType.Asm,
- "assert" : TokenType.Assert,
- "auto" : TokenType.Auto,
- "body" : TokenType.Body,
- "bool" : TokenType.Bool,
- "break" : TokenType.Break,
- "byte" : TokenType.Byte,
- "case" : TokenType.Case,
- "cast" : TokenType.Cast,
- "catch" : TokenType.Catch,
- "cdouble" : TokenType.Cdouble,
- "cent" : TokenType.Cent,
- "cfloat" : TokenType.Cfloat,
- "char" : TokenType.Char,
- "class" : TokenType.Class,
- "const" : TokenType.Const,
- "continue" : TokenType.Continue,
- "creal" : TokenType.Creal,
- "dchar" : TokenType.Dchar,
- "debug" : TokenType.Debug,
- "default" : TokenType.Default,
- "delegate" : TokenType.Delegate,
- "delete" : TokenType.Delete,
- "deprecated" : TokenType.Deprecated,
- "do" : TokenType.Do,
- "double" : TokenType.Double,
- "dstring" : TokenType.DString,
- "else" : TokenType.Else,
- "enum" : TokenType.Enum,
- "export" : TokenType.Export,
- "extern" : TokenType.Extern,
- "false" : TokenType.False,
- "__FILE__" : TokenType.File,
- "finally" : TokenType.Finally,
- "final" : TokenType.Final,
- "float" : TokenType.Float,
- "foreach_reverse" : TokenType.Foreach_reverse,
- "foreach" : TokenType.Foreach,
- "for" : TokenType.For,
- "function" : TokenType.Function,
- "goto" : TokenType.Goto,
- "__gshared" : TokenType.Gshared,
- "idouble" : TokenType.Idouble,
- "ifloat" : TokenType.Ifloat,
- "if" : TokenType.If,
- "immutable" : TokenType.Immutable,
- "import" : TokenType.Import,
- "inout" : TokenType.Inout,
- "interface" : TokenType.Interface,
- "in" : TokenType.In,
- "int" : TokenType.Int,
- "invariant" : TokenType.Invariant,
- "ireal" : TokenType.Ireal,
- "is" : TokenType.Is,
- "lazy" : TokenType.Lazy,
- "__LINE__" : TokenType.Line,
- "long" : TokenType.Long,
- "macro" : TokenType.Macro,
- "mixin" : TokenType.Mixin,
- "module" : TokenType.Module,
- "new" : TokenType.New,
- "nothrow" : TokenType.Nothrow,
- "null" : TokenType.Null,
- "out" : TokenType.Out,
- "override" : TokenType.Override,
- "package" : TokenType.Package,
- "pragma" : TokenType.Pragma,
- "private" : TokenType.Private,
- "protected" : TokenType.Protected,
- "public" : TokenType.Public,
- "pure" : TokenType.Pure,
- "real" : TokenType.Real,
- "ref" : TokenType.Ref,
- "return" : TokenType.Return,
- "scope" : TokenType.Scope,
- "shared" : TokenType.Shared,
- "short" : TokenType.Short,
- "static" : TokenType.Static,
- "string" : TokenType.String,
- "struct" : TokenType.Struct,
- "super" : TokenType.Super,
- "switch" : TokenType.Switch,
- "synchronized" : TokenType.Synchronized,
- "template" : TokenType.Template,
- "this" : TokenType.This,
- "__thread" : TokenType.Thread,
- "throw" : TokenType.Throw,
- "__traits" : TokenType.Traits,
- "true" : TokenType.True,
- "try" : TokenType.Try,
- "typedef" : TokenType.Typedef,
- "typeid" : TokenType.Typeid,
- "typeof" : TokenType.Typeof,
- "ubyte" : TokenType.Ubyte,
- "ucent" : TokenType.Ucent,
- "uint" : TokenType.Uint,
- "ulong" : TokenType.Ulong,
- "union" : TokenType.Union,
- "unittest" : TokenType.Unittest,
- "ushort" : TokenType.Ushort,
- "version" : TokenType.Version,
- "void" : TokenType.Void,
- "volatile" : TokenType.Volatile,
- "wchar" : TokenType.Wchar,
- "while" : TokenType.While,
- "with" : TokenType.With,
- "wstring" : TokenType.WString,
- ];
-}
-
-pure string getTypeFromToken(ref const Token t)
+pure string getTypeFromToken(const Token t)
{
switch (t.type)
{
@@ -609,56 +73,8 @@ pure string getTypeFromToken(ref const Token t)
}
}
-pure bool isIdentifierOrType(ref const Token t)
+pure bool isIdentifierOrType(inout Token t)
{
return t.type == TokenType.Identifier || (t.type > TokenType.TYPES_BEGIN
&& TokenType.TYPES_END);
}
-
-/**
- * Token structure
- */
-struct Token
-{
- /// The token type
- TokenType type;
-
- /// The representation of the token in the original source code
- string value;
-
- /// The number of the line the token is on
- uint lineNumber;
-
- /// The character index of the start of the token in the original text
- uint startIndex;
-
- /**
- * Check to see if the token is of the same type and has the same string
- * representation as the given token
- */
- bool opEquals(ref const(Token) other) const
- {
- return other.type == type && other.value == value;
- }
-
- /**
- * Checks to see if the token's string representation is equal to the given
- * string
- */
- bool opEquals(string range) const { return range == value; }
-
- /**
- * Checks to see if the token is of the given type
- */
- bool opEquals(TokenType t) const { return type == t; }
-
- /**
- * Comparison operator orders by start index
- */
- int opCmp(size_t i) const
- {
- if (startIndex < i) return -1;
- if (startIndex > i) return 1;
- return 0;
- }
-}
diff --git a/main.d b/main.d
index 39e3488..848cd0a 100644
--- a/main.d
+++ b/main.d
@@ -15,13 +15,18 @@ import std.parallelism;
import std.path;
import std.regex;
import std.stdio;
+import std.d.lexer;
+
import autocomplete;
import highlighter;
import langutils;
import location;
import parser;
-import tokenizer;
+
import types;
+import circularbuffer;
+
+immutable size_t CIRC_BUFF_SIZE = 4;
pure bool isLineOfCode(TokenType t)
{
@@ -100,9 +105,9 @@ int main(string[] args)
{
string[] importDirs;
bool sloc;
- bool dotComplete;
+ /+bool dotComplete;+/
bool json;
- bool parenComplete;
+ /+bool parenComplete;+/
bool highlight;
bool ctags;
bool recursiveCtags;
@@ -111,8 +116,8 @@ int main(string[] args)
try
{
- getopt(args, "I", &importDirs, "dotComplete", &dotComplete, "sloc", &sloc,
- "json", &json, "parenComplete", &parenComplete, "highlight", &highlight,
+ getopt(args, "I", &importDirs,/+ "dotComplete", &dotComplete,+/ "sloc", &sloc,
+ "json", &json, /+"parenComplete", &parenComplete,+/ "highlight", &highlight,
"ctags", &ctags, "recursive|r|R", &recursiveCtags, "help|h", &help);
}
catch (Exception e)
@@ -120,7 +125,7 @@ int main(string[] args)
stderr.writeln(e.msg);
}
- if (help || (!sloc && !dotComplete && !json && !parenComplete && !highlight
+ if (help || (!sloc && /+!dotComplete &&+/ !json /+&& !parenComplete+/ && !highlight
&& !ctags && !format))
{
printHelp();
@@ -166,7 +171,7 @@ int main(string[] args)
return 0;
}
- if (dotComplete || parenComplete)
+ /+if (dotComplete || parenComplete)
{
if (isAbsolute(args[1]))
importDirs ~= dirName(args[1]);
@@ -203,11 +208,11 @@ int main(string[] args)
else if (dotComplete)
writeln(complete.dotComplete(to!size_t(args[1])));
return 0;
- }
+ }+/
if (json)
{
- Token[] tokens;
+ CircularBuffer!(Token) tokens;
if (args.length == 1)
{
// Read from stdin
@@ -215,46 +220,46 @@ int main(string[] args)
char[] buf;
while (stdin.readln(buf))
f.put(buf);
- tokens = byToken(f.data).array();
+ tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(f.data));
}
else
{
// read given file
- tokens = byToken(readText(args[1])).array();
+ tokens = new CircularBuffer!(Token)(CIRC_BUFF_SIZE, byToken!string(readText(args[1])));
}
auto mod = parseModule(tokens);
mod.writeJSONTo(stdout);
return 0;
}
- if (ctags)
- {
- if (!recursiveCtags)
- {
- auto tokens = byToken(readText(args[1]));
- auto mod = parseModule(tokens.array());
- mod.writeCtagsTo(stdout, args[1]);
- }
- else
- {
- Module m;
- foreach (dirEntry; dirEntries(args[1], SpanMode.breadth))
- {
- if (!dirEntry.name.endsWith(".d", ".di"))
- continue;
- stderr.writeln("Generating tags for ", dirEntry.name);
- auto tokens = byToken(readText(dirEntry.name));
- if (m is null)
- m = parseModule(tokens.array());
- else
- {
- auto mod = parseModule(tokens.array());
- m.merge(mod);
- }
- }
- m.writeCtagsTo(stdout, "");
- }
- }
+// if (ctags)
+// {
+// if (!recursiveCtags)
+// {
+// auto tokens = byToken(readText(args[1]));
+// auto mod = parseModule(tokens.array());
+// mod.writeCtagsTo(stdout, args[1]);
+// }
+// else
+// {
+// Module m;
+// foreach (dirEntry; dirEntries(args[1], SpanMode.breadth))
+// {
+// if (!dirEntry.name.endsWith(".d", ".di"))
+// continue;
+// stderr.writeln("Generating tags for ", dirEntry.name);
+// auto tokens = byToken(readText(dirEntry.name));
+// if (m is null)
+// m = parseModule(tokens.array());
+// else
+// {
+// auto mod = parseModule(tokens.array());
+// m.merge(mod);
+// }
+// }
+// m.writeCtagsTo(stdout, "");
+// }
+// }
return 0;
}
diff --git a/parser.d b/parser.d
index 1db9481..70498a8 100644
--- a/parser.d
+++ b/parser.d
@@ -10,137 +10,163 @@ import std.stream;
import std.array;
import std.stdio;
import std.algorithm;
+import std.range;
+import std.d.lexer;
-import types, tokenizer;
+import types;
import langutils;
+import circularbuffer;
+alias CircularBuffer!Token TokenBuffer;
+
+class Balanced : TokenBuffer
+{
+public:
+
+ this(InputRange!Token tokens, TokenType open, TokenType close)
+ {
+ super(0, tokens);
+ this.open = open;
+ this.close = close;
+ }
+
+ override bool empty() @property
+ {
+ return _empty;
+ }
+
+ override Token front() const @property
+ {
+ return range.front;
+ }
+
+ override void popFront()
+ {
+ range.popFront();
+ if (range.front == open)
+ ++depth;
+ else if (range.front == close)
+ --depth;
+ _empty = depth == 0;
+ }
+
+private:
+ int depth;
+ TokenType open;
+ TokenType close;
+ TokenBuffer range;
+ bool _empty;
+}
/**
* Params:
* tokens = the array of tokens
- * index = an index into tokens such that tokens[index].type == open
+ * index = an index into tokens such that tokens.front.type == open
* open = the opening delimiter
* close = the closing delimiter
* Returns: all tokens that are between the balanced delimiters that start at
- * tokens[index], not including the delimiters. If the delimiters in tokens
+ * tokens.front, not including the delimiters. If the delimiters in tokens
* are not balanced, this function will return tokens[index + 1 .. $];
*/
-const(Token)[] betweenBalanced(const Token[] tokens, ref size_t index, TokenType open,
- TokenType close)
+Balanced betweenBalanced(TokenBuffer tokens,
+ TokenType open, TokenType close)
in
{
- assert (tokens[index] == open);
+ assert (tokens.front == open);
+}
+body
+{
+ return new Balanced(tokens, open, close);
+}
+
+
+/**
+ * See_also: betweenBalanced
+ */
+Balanced betweenBalancedBraces(TokenBuffer tokens)
+{
+ return betweenBalanced(tokens, TokenType.LBrace, TokenType.RBrace);
+}
+
+
+/**
+ * See_also: betweenBalanced
+ */
+Balanced betweenBalancedParens(TokenBuffer tokens)
+{
+ return betweenBalanced(tokens, TokenType.LParen, TokenType.RParen);
+}
+
+
+/**
+ * See_also: betweenBalanced
+ */
+Balanced betweenBalancedBrackets(TokenBuffer tokens)
+{
+ return betweenBalanced(tokens, TokenType.LBracket, TokenType.RBracket);
+}
+
+void skipBalanced(alias openToken, alias closeToken)(TokenBuffer tokens)
+in
+{
+ assert (tokens.front == openToken);
}
body
{
- ++index;
- size_t start = index;
int depth = 1;
- while (depth > 0 && index < tokens.length)
+ tokens.popFront();
+ while (!tokens.empty && depth != 0)
{
- if (tokens[index] == open) ++depth;
- else if (tokens[index] == close) --depth;
- ++index;
- }
- return tokens[start .. index - 1];
-}
-
-
-/**
- * See_also: betweenBalanced
- */
-const(Token)[] betweenBalancedBraces(const Token[] tokens, ref size_t index)
-{
- return betweenBalanced(tokens, index, TokenType.LBrace, TokenType.RBrace);
-}
-
-
-/**
- * See_also: betweenBalanced
- */
-const(Token)[] betweenBalancedParens(const Token[] tokens, ref size_t index)
-{
- return betweenBalanced(tokens, index, TokenType.LParen, TokenType.RParen);
-}
-
-
-/**
- * See_also: betweenBalanced
- */
-const(Token)[] betweenBalancedBrackets(const Token[] tokens, ref size_t index)
-{
- return betweenBalanced(tokens, index, TokenType.LBracket, TokenType.RBracket);
-}
-
-
-/**
- * If tokens[index] is currently openToken, advances index until it refers to a
- * location in tokens directly after the balanced occurance of closeToken. If
- * tokens[index] is closeToken, decrements index
- *
- */
-void skipBalanced(alias openToken, alias closeToken)(const Token[] tokens, ref size_t index)
-{
- int depth = tokens[index] == openToken ? 1 : -1;
- int deltaIndex = depth;
- index += deltaIndex;
- for (; index < tokens.length && index > 0 && depth != 0; index += deltaIndex)
- {
- switch (tokens[index].type)
+ switch (tokens.front.type)
{
case openToken: ++depth; break;
case closeToken: --depth; break;
default: break;
}
+ tokens.popFront();
}
}
-void skipParens(const Token[] tokens, ref size_t index)
+void skipParens(TokenBuffer tokens)
{
- skipBalanced!(TokenType.LParen, TokenType.RParen)(tokens, index);
+ skipBalanced!(TokenType.LParen, TokenType.RParen)(tokens);
}
-void skipBrackets(const Token[] tokens, ref size_t index)
+void skipBrackets(TokenBuffer tokens)
{
- skipBalanced!(TokenType.LBracket, TokenType.RBracket)(tokens, index);
+ skipBalanced!(TokenType.LBracket, TokenType.RBracket)(tokens);
}
-void skipBraces(const Token[] tokens, ref size_t index)
+void skipBraces(TokenBuffer tokens)
{
- skipBalanced!(TokenType.LBrace, TokenType.RBrace)(tokens, index);
+ skipBalanced!(TokenType.LBrace, TokenType.RBrace)(tokens);
}
/**
* Params:
* tokens = the token array to examine
- * index = an indext into tokens such that tokens[index].type == open
+ * index = an indext into tokens such that tokens.front.type == open
* open = the opening delimiter
* close = the closing delimiter
* Returns: a string representing the contents of the two delimiters. This will
* not preserve whitespace, but it will place a single space character after
* a comma and between identifiers.
*/
-string content(const Token[] tokens, ref size_t index, TokenType open, TokenType close)
+string content(TokenBuffer tokens, TokenType open, TokenType close)
in
{
- assert (tokens[index] == open);
+ assert (tokens.front == open);
}
body
{
- index++;
auto app = appender!string();
int depth = 1;
- while (depth > 0 && index < tokens.length)
+ foreach (t; betweenBalanced(tokens, open, close))
{
- if (tokens[index] == open) ++depth;
- else if (tokens[index] == close) --depth;
- else if (tokens[index] == TokenType.Comma)
- {
+ if (t == TokenType.Comma)
app.put(", ");
- }
else
- app.put(tokens[index].value);
- ++index;
+ app.put(t.value);
}
return app.data;
}
@@ -149,18 +175,18 @@ body
/**
* See_also: content
*/
-string parenContent(const Token[]tokens, ref size_t index)
+string parenContent(TokenBuffer tokens)
{
- return "(" ~ content(tokens, index, TokenType.LParen, TokenType.RParen) ~ ")";
+ return "(" ~ content(tokens, TokenType.LParen, TokenType.RParen) ~ ")";
}
/**
* See_also: content
*/
-string bracketContent(const Token[]tokens, ref size_t index)
+string bracketContent(TokenBuffer tokens)
{
- return "[" ~ content(tokens, index, TokenType.LBracket, TokenType.RBracket) ~ "]";
+ return "[" ~ content(tokens, TokenType.LBracket, TokenType.RBracket) ~ "]";
}
@@ -169,14 +195,12 @@ string bracketContent(const Token[]tokens, ref size_t index)
* index initially indexed a right brace, or advances index until it indexes a
* character after a simicolon otherwise.
*/
-void skipBlockStatement(const Token[] tokens, ref size_t index)
+void skipBlockStatement(TokenBuffer tokens)
{
- if (tokens[index] == TokenType.LBrace)
- betweenBalancedBraces(tokens, index);
+ if (tokens.front == TokenType.LBrace)
+ skipBraces(tokens);
else
- {
- skipPastNext(tokens, TokenType.Semicolon, index);
- }
+ skipPastNext(tokens, TokenType.Semicolon);
}
@@ -185,50 +209,57 @@ void skipBlockStatement(const Token[] tokens, ref size_t index)
* of type type. This function handles nesting of braces, brackets, and
* parenthesis
*/
-void skipPastNext(const Token[] tokens, TokenType type, ref size_t index)
+void skipPastNext(TokenBuffer tokens, TokenType type)
{
- while (index < tokens.length)
+ while (!tokens.empty)
{
- if (tokens[index].type == TokenType.LBrace)
- betweenBalancedBraces(tokens, index);
- else if (tokens[index].type == TokenType.LParen)
- betweenBalancedParens(tokens, index);
- else if (tokens[index].type == TokenType.LBracket)
- betweenBalancedBrackets(tokens, index);
- else if (tokens[index].type == type)
+ if (tokens.front.type == TokenType.LBrace)
+ skipBraces(tokens);
+ else if (tokens.front.type == TokenType.LParen)
+ skipParens(tokens);
+ else if (tokens.front.type == TokenType.LBracket)
+ skipBrackets(tokens);
+ else if (tokens.front.type == type)
{
- ++index;
+ tokens.popFront();
return;
}
else
- ++index;
+ tokens.popFront();
}
}
-string parseTypeDeclaration(const Token[] tokens, ref size_t index)
+string parseTypeDeclaration(TokenBuffer tokens)
{
- auto type = tokens[index++].value.idup;
- buildingType: while (index < tokens.length)
+ auto type = tokens.front.value;
+ tokens.popFront();
+ buildingType: while (!tokens.empty)
{
- switch (tokens[index].type)
+ switch (tokens.front.type)
{
case TokenType.LBracket:
- type ~= bracketContent(tokens, index);
+ type ~= bracketContent(tokens);
break;
case TokenType.Not:
- type ~= tokens[index++].value;
- if (tokens[index] == TokenType.LParen)
- type ~= parenContent(tokens, index);
+ type ~= tokens.front.value;
+ tokens.popFront();
+ if (tokens.front == TokenType.LParen)
+ type ~= parenContent(tokens);
else
- type ~= tokens[index++].value;
+ {
+ type ~= tokens.front.value;
+ tokens.popFront();
+ }
break;
case TokenType.Star:
case TokenType.BitAnd:
- type ~= tokens[index++].value;
+ type ~= tokens.front.value;
+ tokens.popFront();
break;
case TokenType.Function:
- type ~= " " ~ tokens[index++].value;
- type ~= parenContent(tokens, index);
+ type ~= " " ~ tokens.front.value;
+ tokens.popFront();
+ type ~= parenContent(tokens);
break;
default:
break buildingType;
@@ -244,7 +275,7 @@ string parseTypeDeclaration(const Token[] tokens, ref size_t index)
* attributes = the default attributes for a block statement
* Returns: the parsed module
*/
-Module parseModule(const Token[] tokens, string protection = "public", string[] attributes = [])
+Module parseModule(TokenBuffer tokens, string protection = "public", string[] attributes = [])
{
string type;
string name;
@@ -261,42 +292,45 @@ Module parseModule(const Token[] tokens, string protection = "public", string[]
Module mod = new Module;
size_t index = 0;
- while(index < tokens.length)
+ while(!tokens.empty)
{
- switch(tokens[index].type)
+ switch(tokens.front.type)
{
- case TokenType.Pragma:
- ++index;
- if (tokens[index] == TokenType.LParen)
- skipParens(tokens, index);
- break;
+ case TokenType.Pragma:
+ tokens.popFront();
+ if (tokens.front == TokenType.LParen)
+ skipParens(tokens);
+ break;
case TokenType.Mixin:
case TokenType.Assert:
- ++index;
- tokens.skipBlockStatement(index);
+ tokens.popFront();
+ tokens.skipBlockStatement();
break;
case TokenType.Alias:
- Alias a = parseAlias(tokens, index,
+ Alias a = parseAlias(tokens,
localProtection.empty() ? protection : localProtection,
attributes);
mod.aliases ~= a;
break;
case TokenType.Import:
- mod.imports ~= parseImports(tokens, index);
+ mod.imports ~= parseImports(tokens);
resetLocals();
break;
case TokenType.Version:
- ++index;
- if (tokens[index] == TokenType.LParen)
+ tokens.popFront();
+ if (tokens.front == TokenType.LParen)
{
- tokens.betweenBalancedParens(index);
- if (tokens[index] == TokenType.LBrace)
- mod.merge(parseModule(betweenBalancedBraces(tokens, index),
+ tokens.betweenBalancedParens();
+ if (tokens.front == TokenType.LBrace)
+ {
+ auto braceContent = tokens.betweenBalancedBraces();
+ mod.merge(parseModule(braceContent,
localProtection.empty() ? protection : localProtection,
attributes));
+ }
}
- else if (tokens[index] == TokenType.Assign)
- tokens.skipBlockStatement(index);
+ else if (tokens.front == TokenType.Assign)
+ tokens.skipBlockStatement();
break;
case TokenType.Deprecated:
case TokenType.Nothrow:
@@ -306,7 +340,8 @@ Module parseModule(const Token[] tokens, string protection = "public", string[]
case TokenType.Final:
case TokenType.Gshared:
case TokenType.Static:
- localAttributes ~= tokens[index++].value;
+ localAttributes ~= tokens.front.value;
+ tokens.popFront();
break;
case TokenType.Const:
case TokenType.Immutable:
@@ -314,10 +349,11 @@ Module parseModule(const Token[] tokens, string protection = "public", string[]
case TokenType.Pure:
case TokenType.Scope:
case TokenType.Shared:
- auto tmp = tokens[index++].value;
- if (tokens[index] == TokenType.LParen)
- type = tmp ~ parenContent(tokens, index);
- else if (tokens[index] == TokenType.Colon)
+ auto tmp = tokens.front.value;
+ tokens.popFront();
+ if (tokens.front == TokenType.LParen)
+ type = tmp ~ tokens.parenContent();
+ else if (tokens.front == TokenType.Colon)
{
index++;
attributes ~= tmp;
@@ -326,78 +362,83 @@ Module parseModule(const Token[] tokens, string protection = "public", string[]
break;
case TokenType.Align:
case TokenType.Extern:
- string attribute = tokens[index++].value;
- if (tokens[index] == TokenType.LParen)
- attribute ~= parenContent(tokens, index);
- if (tokens[index] == TokenType.LBrace)
- mod.merge(parseModule(betweenBalancedBraces(tokens, index),
+ string attribute = tokens.front.value;
+ tokens.popFront();
+ if (tokens.front == TokenType.LParen)
+ attribute ~= parenContent(tokens);
+ if (tokens.front == TokenType.LBrace)
+ mod.merge(parseModule(tokens.betweenBalancedBraces(),
localProtection.empty() ? protection : localProtection,
attributes ~ attribute));
- else if (tokens[index] == TokenType.Colon)
+ else if (tokens.front == TokenType.Colon)
{
- ++index;
+ tokens.popFront();
attributes ~= attribute;
}
else
localAttributes ~= attribute;
break;
case TokenType.PROTECTION_BEGIN: .. case TokenType.PROTECTION_END:
- string p = tokens[index++].value;
- if (tokens[index] == TokenType.Colon)
+ string p = tokens.front.value;
+ tokens.popFront();
+ if (tokens.front == TokenType.Colon)
{
protection = p;
- ++index;
+ tokens.popFront();
}
- else if (tokens[index] == TokenType.LBrace)
- mod.merge(parseModule(betweenBalancedBraces(tokens, index),
+ else if (tokens.front == TokenType.LBrace)
+ mod.merge(parseModule(tokens.betweenBalancedBraces(),
p, attributes ~ localAttributes));
else
localProtection = p;
break;
case TokenType.Module:
- ++index;
- while (index < tokens.length && tokens[index] != TokenType.Semicolon)
- mod.name ~= tokens[index++].value;
- ++index;
+ tokens.popFront();
+ while (!tokens.empty && tokens.front != TokenType.Semicolon)
+ {
+ mod.name ~= tokens.front.value;
+ tokens.popFront();
+ }
+ tokens.popFront();
resetLocals();
break;
case TokenType.Union:
- mod.unions ~= parseUnion(tokens, index,
+ mod.unions ~= parseUnion(tokens,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Class:
- mod.classes ~= parseClass(tokens, index,
+ mod.classes ~= parseClass(tokens,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Interface:
- mod.interfaces ~= parseInterface(tokens, index,
+ mod.interfaces ~= parseInterface(tokens,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Struct:
- mod.structs ~= parseStruct(tokens, index,
+ mod.structs ~= parseStruct(tokens,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Enum:
- mod.enums ~= parseEnum(tokens, index,
+ mod.enums ~= parseEnum(tokens,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
resetLocals();
break;
case TokenType.Template:
- ++index; // template
- ++index; // name
- if (tokens[index] == TokenType.LParen)
- tokens.betweenBalancedParens(index); // params
- if (tokens[index] == TokenType.LBrace)
- tokens.betweenBalancedBraces(index); // body
+ tokens.popFront(); // template
+ tokens.popFront(); // name
+ if (tokens.front == TokenType.LParen)
+ tokens.betweenBalancedParens(); // params
+ if (tokens.front == TokenType.LBrace)
+ tokens.betweenBalancedBraces(); // body
resetLocals();
break;
case TokenType.TYPES_BEGIN: .. case TokenType.TYPES_END:
@@ -405,16 +446,17 @@ Module parseModule(const Token[] tokens, string protection = "public", string[]
case TokenType.Identifier:
if (type.empty())
{
- type = tokens.parseTypeDeclaration(index);
+ type = tokens.parseTypeDeclaration();
}
else
{
- name = tokens[index++].value;
- if (index >= tokens.length) break;
- if (tokens[index] == TokenType.LParen)
+ name = tokens.front.value;
+ tokens.popFront();
+ if (tokens.empty) break;
+ if (tokens.front == TokenType.LParen)
{
- mod.functions ~= parseFunction(tokens, index, type, name,
- tokens[index].lineNumber,
+ mod.functions ~= parseFunction(tokens, type, name,
+ tokens.front.lineNumber,
localProtection.empty() ? protection : localProtection,
attributes ~ localAttributes);
}
@@ -425,39 +467,40 @@ Module parseModule(const Token[] tokens, string protection = "public", string[]
v.type = type;
v.attributes = localAttributes ~ attributes;
v.protection = localProtection.empty() ? protection : localProtection;
- v.line = tokens[index].lineNumber;
+ v.line = tokens.front.lineNumber;
mod.variables ~= v;
}
resetLocals();
}
break;
case TokenType.Unittest:
- ++index;
- if (!tokens.empty() && tokens[index] == TokenType.LBrace)
- tokens.skipBlockStatement(index);
+ tokens.popFront();
+ if (!tokens.empty() && tokens.front == TokenType.LBrace)
+ tokens.skipBlockStatement();
resetLocals();
break;
case TokenType.Tilde:
- ++index;
- if (tokens[index] == TokenType.This)
+ tokens.popFront();
+ if (tokens.front == TokenType.This)
{
name = "~";
goto case;
}
break;
case TokenType.This:
- name ~= tokens[index++].value;
- if (index < tokens.length && tokens[index] == TokenType.LParen)
+ name ~= tokens.front.value;
+ tokens.popFront();
+ if (!tokens.empty && tokens.front == TokenType.LParen)
{
- mod.functions ~= parseFunction(tokens, index, "", name,
- tokens[index - 1].lineNumber,
+ mod.functions ~= parseFunction(tokens, "", name,
+ tokens.peek(-1).lineNumber,
localProtection.empty() ? protection : localProtection,
localAttributes ~ attributes);
}
resetLocals();
break;
default:
- ++index;
+ tokens.popFront();
break;
}
}
@@ -470,32 +513,33 @@ Module parseModule(const Token[] tokens, string protection = "public", string[]
* Returns: only the module names that were imported, not which symbols were
* selectively improted.
*/
-string[] parseImports(const Token[] tokens, ref size_t index)
+string[] parseImports(TokenBuffer tokens)
{
- assert(tokens[index] == TokenType.Import);
- ++index;
+ assert(tokens.front == TokenType.Import);
+ tokens.popFront();
auto app = appender!(string[])();
string im;
- while (index < tokens.length)
+ while (!tokens.empty)
{
- switch(tokens[index].type)
+ switch(tokens.front.type)
{
case TokenType.Comma:
- ++index;
+ tokens.popFront();
app.put(im);
im = "";
break;
case TokenType.Assign:
case TokenType.Semicolon:
app.put(im);
- ++index;
+ tokens.popFront();
return app.data;
case TokenType.Colon:
app.put(im);
- tokens.skipBlockStatement(index);
+ tokens.skipBlockStatement();
return app.data;
default:
- im ~= tokens[index++].value;
+ im ~= tokens.front.value;
+ tokens.popFront();
break;
}
}
@@ -506,92 +550,98 @@ string[] parseImports(const Token[] tokens, ref size_t index)
/**
* Parses an enum declaration
*/
-Enum parseEnum(const Token[] tokens, ref size_t index, string protection,
- string[] attributes)
+Enum parseEnum(TokenBuffer tokens, string protection, string[] attributes)
in
{
- assert (tokens[index] == TokenType.Enum);
+ assert (tokens.front == TokenType.Enum);
}
body
{
Enum e = new Enum;
- e.line = tokens[index].lineNumber;
- ++index;
+ e.line = tokens.front.lineNumber;
+ tokens.popFront();
string enumType;
e.protection = protection;
- if (tokens[index] == TokenType.LBrace)
+ if (tokens.front == TokenType.LBrace)
goto enumBody;
- if (isIdentifierOrType(tokens[index]))
+ if (isIdentifierOrType(tokens.front))
{
- if (index + 1 < tokens.length && tokens[index + 1] == TokenType.Identifier)
+ if (tokens.canPeek() && tokens.peek() == TokenType.Identifier)
{
// enum long l = 4;
EnumMember m;
- m.type = tokens[index++].value;
- m.line = tokens[index].lineNumber;
- e.name = m.name = tokens[index].value;
+ m.type = tokens.front.value;
+ tokens.popFront();
+ m.line = tokens.front.lineNumber;
+ e.name = m.name = tokens.front.value;
e.members ~= m;
- skipBlockStatement(tokens, index);
+ tokens.skipBlockStatement();
return e;
}
- else if (index + 1 < tokens.length && tokens[index + 1] == TokenType.Assign)
+ else if (tokens.canPeek() && tokens.peek() == TokenType.Assign)
{
// enum m = "abcd";
- e.name = tokens[index].value;
+ e.name = tokens.front.value;
EnumMember m;
m.name = e.name;
- m.line = tokens[index].lineNumber;
- m.type = getTypeFromToken(tokens[index + 2]);
+ m.line = tokens.front.lineNumber;
+ m.type = getTypeFromToken(tokens.peek(2));
e.members ~= m;
- skipBlockStatement(tokens, index);
+ tokens.skipBlockStatement();
return e;
}
}
- if (isIdentifierOrType(tokens[index]))
- e.name = tokens[index++].value;
-
- if (tokens[index] == TokenType.Colon)
+ if (isIdentifierOrType(tokens.front))
{
- index++;
- if (!isIdentifierOrType(tokens[index]))
- skipBlockStatement(tokens, index);
+ e.name = tokens.front.value;
+ tokens.popFront();
+ }
+
+ if (tokens.front == TokenType.Colon)
+ {
+ tokens.popFront();
+ if (!isIdentifierOrType(tokens.front))
+ tokens.skipBlockStatement();
else
- enumType = tokens[index++].value;
+ {
+ enumType = tokens.front.value;
+ tokens.popFront();
+ }
}
enumBody:
-
- auto r = betweenBalancedBraces(tokens, index);
- for (size_t i = 0; i < r.length;)
- {
- EnumMember m;
- if (isIdentifierOrType(r[i]) && i + 1 < r.length && isIdentifierOrType(r[i + 1]))
- {
- m.line = r[i + 1].lineNumber;
- m.name = r[i + 1].value;
- m.type = r[i].value;
- }
- else if (isIdentifierOrType(r[i]) && i + 1 < r.length && r[i + 1] == TokenType.Assign)
- {
- if (enumType == null && i + 2 < r.length)
- m.type = getTypeFromToken(r[i + 2]);
- else
- m.type = enumType;
- m.line = r[i].lineNumber;
- m.name = r[i].value;
- }
- else
- {
- m.line = r[i].lineNumber;
- m.name = r[i].value;
- m.type = enumType == null ? "int" : enumType;
- }
- e.members ~= m;
- skipPastNext(r, TokenType.Comma, i);
- }
+//
+// auto r = tokens.betweenBalancedBraces();
+// while (!r.empty)
+// {
+// EnumMember m;
+// if (isIdentifierOrType(r.front) && i + 1 < r.length && isIdentifierOrType(r[i + 1]))
+// {
+// m.line = r[i + 1].lineNumber;
+// m.name = r[i + 1].value;
+// m.type = r.front.value;
+// }
+// else if (isIdentifierOrType(r.front) && i + 1 < r.length && r[i + 1] == TokenType.Assign)
+// {
+// if (enumType == null && i + 2 < r.length)
+// m.type = getTypeFromToken(r[i + 2]);
+// else
+// m.type = enumType;
+// m.line = r.front.lineNumber;
+// m.name = r.front.value;
+// }
+// else
+// {
+// m.line = r.front.lineNumber;
+// m.name = r.front.value;
+// m.type = enumType == null ? "int" : enumType;
+// }
+// e.members ~= m;
+// skipPastNext(r, TokenType.Comma, i);
+// }
return e;
}
@@ -599,11 +649,11 @@ enumBody:
/**
* Parses a function declaration
*/
-Function parseFunction(const Token[] tokens, ref size_t index, string type,
+Function parseFunction(TokenBuffer tokens, string type,
string name, uint line, string protection, string[] attributes)
in
{
- assert (tokens[index] == TokenType.LParen);
+ assert (tokens.front == TokenType.LParen);
}
body
{
@@ -613,20 +663,20 @@ body
f.line = line;
f.attributes.insertInPlace(f.attributes.length, attributes);
- Variable[] vars1 = parseParameters(tokens, index);
- if (index < tokens.length && tokens[index] == TokenType.LParen)
+ Variable[] vars1 = parseParameters(tokens);
+ if (!tokens.empty && tokens.front == TokenType.LParen)
{
f.templateParameters.insertInPlace(f.templateParameters.length,
map!("a.type")(vars1));
f.parameters.insertInPlace(f.parameters.length,
- parseParameters(tokens, index));
+ parseParameters(tokens));
}
else
f.parameters.insertInPlace(f.parameters.length, vars1);
- attributeLoop: while(index < tokens.length)
+ attributeLoop: while(!tokens.empty)
{
- switch (tokens[index].type)
+ switch (tokens.front.type)
{
case TokenType.Immutable:
case TokenType.Const:
@@ -634,62 +684,64 @@ body
case TokenType.Nothrow:
case TokenType.Final:
case TokenType.Override:
- f.attributes ~= tokens[index++].value;
+ f.attributes ~= tokens.front.value;
+ tokens.popFront();
break;
default:
break attributeLoop;
}
}
- if (index < tokens.length && tokens[index] == TokenType.If)
- f.constraint = parseConstraint(tokens, index);
+ if (!tokens.empty && tokens.front == TokenType.If)
+ f.constraint = parseConstraint(tokens);
- while (index < tokens.length &&
- (tokens[index] == TokenType.In || tokens[index] == TokenType.Out
- || tokens[index] == TokenType.Body))
+ while (!tokens.empty &&
+ (tokens.front == TokenType.In || tokens.front == TokenType.Out
+ || tokens.front == TokenType.Body))
{
- ++index;
- if (index < tokens.length && tokens[index] == TokenType.LParen
- && tokens[index - 1] == TokenType.Out)
+ tokens.popFront();
+ if (!tokens.empty && tokens.front == TokenType.LParen
+ && tokens.peek(-1) == TokenType.Out)
{
- tokens.skipParens(index);
+ tokens.skipParens();
}
- if (index < tokens.length && tokens[index] == TokenType.LBrace)
- tokens.skipBlockStatement(index);
+ if (!tokens.empty && tokens.front == TokenType.LBrace)
+ tokens.skipBlockStatement();
}
- if (index >= tokens.length)
+ if (!tokens.empty)
return f;
- if (tokens[index] == TokenType.LBrace)
- tokens.skipBlockStatement(index);
- else if (tokens[index] == TokenType.Semicolon)
- ++index;
+ if (tokens.front == TokenType.LBrace)
+ tokens.skipBlockStatement();
+ else if (tokens.front == TokenType.Semicolon)
+ tokens.popFront();
return f;
}
-string parseConstraint(const Token[] tokens, ref size_t index)
+string parseConstraint(TokenBuffer tokens)
{
auto appender = appender!(string)();
- assert(tokens[index] == TokenType.If);
- appender.put(tokens[index++].value);
- assert(tokens[index] == TokenType.LParen);
- return "if " ~ parenContent(tokens, index);
+ assert(tokens.front == TokenType.If);
+ appender.put(tokens.front.value);
+ tokens.popFront();
+ assert(tokens.front == TokenType.LParen);
+ return "if " ~ tokens.parenContent();
}
-Variable[] parseParameters(const Token[] tokens, ref size_t index)
+Variable[] parseParameters(TokenBuffer tokens)
in
{
- assert (tokens[index] == TokenType.LParen);
+ assert (tokens.front == TokenType.LParen);
}
body
{
auto appender = appender!(Variable[])();
Variable v = new Variable;
- auto r = betweenBalancedParens(tokens, index);
+ auto r = betweenBalancedParens(tokens);
size_t i = 0;
- while (i < r.length)
+ while (!r.empty)
{
- switch(r[i].type)
+ switch(r.front.type)
{
case TokenType.Alias:
case TokenType.In:
@@ -701,15 +753,16 @@ body
case TokenType.Immutable:
case TokenType.Shared:
case TokenType.Inout:
- auto tmp = r[i++].value;
- if (r[i] == TokenType.LParen)
- v.type ~= tmp ~ parenContent(r, i);
+ auto tmp = r.front.value;
+ r.popFront();
+ if (r.front == TokenType.LParen)
+ v.type ~= tmp ~ parenContent(r);
else
v.attributes ~= tmp;
break;
case TokenType.Colon:
i++;
- r.skipPastNext(TokenType.Comma, i);
+ r.skipPastNext(TokenType.Comma);
appender.put(v);
v = new Variable;
break;
@@ -721,21 +774,22 @@ body
default:
if (v.type.empty())
{
- v.type = r.parseTypeDeclaration(i);
- if (i >= r.length)
+ v.type = r.parseTypeDeclaration();
+ if (!r.empty)
appender.put(v);
}
else
{
- v.line = r[i].lineNumber;
- v.name = r[i++].value;
+ v.line = r.front.lineNumber;
+ v.name = r.front.value;
+ r.popFront();
appender.put(v);
- if (i < r.length && r[i] == TokenType.Vararg)
+ if (!r.empty && r.front == TokenType.Vararg)
{
v.type ~= " ...";
}
v = new Variable;
- r.skipPastNext(TokenType.Comma, i);
+ r.skipPastNext(TokenType.Comma);
}
break;
}
@@ -743,23 +797,23 @@ body
return appender.data;
}
-string[] parseBaseClassList(const Token[] tokens, ref size_t index)
+string[] parseBaseClassList(TokenBuffer tokens)
in
{
- assert(tokens[index] == TokenType.Colon);
+ assert(tokens.front == TokenType.Colon);
}
body
{
auto appender = appender!(string[])();
- ++index;
- while (index < tokens.length)
+ tokens.popFront();
+ while (!tokens.empty)
{
- if (tokens[index] == TokenType.Identifier)
+ if (tokens.front == TokenType.Identifier)
{
- string base = parseTypeDeclaration(tokens, index);
+ string base = parseTypeDeclaration(tokens);
appender.put(base);
- if (tokens[index] == TokenType.Comma)
- ++index;
+ if (tokens.front == TokenType.Comma)
+ tokens.popFront();
else
break;
}
@@ -769,117 +823,118 @@ body
return appender.data;
}
-void parseStructBody(const Token[] tokens, ref size_t index, Struct st)
+void parseStructBody(TokenBuffer tokens, Struct st)
{
- st.bodyStart = tokens[index].startIndex;
- Module m = parseModule(betweenBalancedBraces(tokens, index));
- st.bodyEnd = tokens[index - 1].startIndex;
+ st.bodyStart = tokens.front.startIndex;
+ Module m = parseModule(tokens.betweenBalancedBraces());
+ st.bodyEnd = tokens.peek(-1).startIndex;
st.functions.insertInPlace(0, m.functions);
st.variables.insertInPlace(0, m.variables);
st.aliases.insertInPlace(0, m.aliases);
}
-Struct parseStructOrUnion(const Token[] tokens, ref size_t index, string protection,
+Struct parseStructOrUnion(TokenBuffer tokens, string protection,
string[] attributes)
{
Struct s = new Struct;
- s.line = tokens[index].lineNumber;
+ s.line = tokens.front.lineNumber;
s.attributes = attributes;
s.protection = protection;
- s.name = tokens[index++].value;
- if (tokens[index] == TokenType.LParen)
+ s.name = tokens.front.value;
+ tokens.popFront();
+ if (tokens.front == TokenType.LParen)
s.templateParameters.insertInPlace(s.templateParameters.length,
- map!("a.type")(parseParameters(tokens, index)));
+ map!("a.type")(parseParameters(tokens)));
- if (index >= tokens.length) return s;
+ if (tokens.empty) return s;
- if (tokens[index] == TokenType.If)
- s.constraint = parseConstraint(tokens, index);
+ if (tokens.front == TokenType.If)
+ s.constraint = parseConstraint(tokens);
- if (index >= tokens.length) return s;
+ if (tokens.empty) return s;
- if (tokens[index] == TokenType.LBrace)
- parseStructBody(tokens, index, s);
+ if (tokens.front == TokenType.LBrace)
+ parseStructBody(tokens, s);
else
- tokens.skipBlockStatement(index);
+ tokens.skipBlockStatement();
return s;
}
-Struct parseStruct(const Token[] tokens, ref size_t index, string protection,
+Struct parseStruct(TokenBuffer tokens, string protection,
string[] attributes)
in
{
- assert(tokens[index] == TokenType.Struct);
+ assert(tokens.front == TokenType.Struct);
}
body
{
- return parseStructOrUnion(tokens, ++index, protection, attributes);
+ return parseStructOrUnion(tokens, protection, attributes);
}
-Struct parseUnion(const Token[] tokens, ref size_t index, string protection,
- string[] attributes)
+Struct parseUnion(TokenBuffer tokens, string protection, string[] attributes)
in
{
- assert(tokens[index] == TokenType.Union);
+ assert(tokens.front == TokenType.Union);
}
body
{
- return parseStructOrUnion(tokens, ++index, protection, attributes);
+ tokens.popFront();
+ return parseStructOrUnion(tokens, protection, attributes);
}
-Inherits parseInherits(const Token[] tokens, ref size_t index, string protection,
- string[] attributes)
+Inherits parseInherits(TokenBuffer tokens, string protection, string[] attributes)
{
auto i = new Inherits;
- i.line = tokens[index].lineNumber;
- i.name = tokens[index++].value;
+ i.line = tokens.front.lineNumber;
+ i.name = tokens.front.value;
+ tokens.popFront();
i.protection = protection;
i.attributes.insertInPlace(i.attributes.length, attributes);
- if (tokens[index] == TokenType.LParen)
+ if (tokens.front == TokenType.LParen)
i.templateParameters.insertInPlace(i.templateParameters.length,
- map!("a.type")(parseParameters(tokens, index)));
+ map!("a.type")(parseParameters(tokens)));
- if (index >= tokens.length) return i;
+ if (tokens.empty) return i;
- if (tokens[index] == TokenType.If)
- i.constraint = parseConstraint(tokens, index);
+ if (tokens.front == TokenType.If)
+ i.constraint = parseConstraint(tokens);
- if (index >= tokens.length) return i;
+ if (tokens.empty) return i;
- if (tokens[index] == TokenType.Colon)
- i.baseClasses = parseBaseClassList(tokens, index);
+ if (tokens.front == TokenType.Colon)
+ i.baseClasses = parseBaseClassList(tokens);
- if (index >= tokens.length) return i;
+ if (tokens.empty) return i;
- if (tokens[index] == TokenType.LBrace)
- parseStructBody(tokens, index, i);
+ if (tokens.front == TokenType.LBrace)
+ parseStructBody(tokens, i);
else
- tokens.skipBlockStatement(index);
+ tokens.skipBlockStatement();
return i;
}
-Inherits parseInterface(const Token[] tokens, ref size_t index, string protection,
- string[] attributes)
+Inherits parseInterface(TokenBuffer tokens, string protection, string[] attributes)
in
{
- assert (tokens[index] == TokenType.Interface);
+ assert (tokens.front == TokenType.Interface);
}
body
{
- return parseInherits(tokens, ++index, protection, attributes);
+ tokens.popFront();
+ return parseInherits(tokens, protection, attributes);
}
-Inherits parseClass(const Token[] tokens, ref size_t index, string protection,
- string[] attributes)
+Inherits parseClass(TokenBuffer tokens, string protection, string[] attributes)
in
{
- assert(tokens[index] == TokenType.Class);
+ assert(tokens.front == TokenType.Class);
}
body
{
- return parseInherits(tokens, ++index, protection, attributes);
+ tokens.popFront();
+ return parseInherits(tokens, protection, attributes);
}
@@ -889,24 +944,23 @@ body
* but there seems to be no example of this being used, nor has the compiler
* accepted any of my attempts to create one. Therefore, it's not supported here
*/
-Alias parseAlias(const Token[] tokens, ref size_t index, string protection,
- string[] attributes)
+Alias parseAlias(TokenBuffer tokens, string protection, string[] attributes)
in
{
- assert(tokens[index] == TokenType.Alias);
+ assert(tokens.front == TokenType.Alias);
}
body
{
- index++;
+ tokens.popFront();
Alias a = new Alias;
- a.aliasedType = parseTypeDeclaration(tokens, index);
+ a.aliasedType = parseTypeDeclaration(tokens);
a.attributes = attributes;
a.protection = protection;
- if (tokens[index] == TokenType.Identifier)
+ if (tokens.front == TokenType.Identifier)
{
- a.name = tokens[index].value;
- a.line = tokens[index].lineNumber;
- skipBlockStatement(tokens, index);
+ a.name = tokens.front.value;
+ a.line = tokens.front.lineNumber;
+ skipBlockStatement(tokens);
}
else
return null;
diff --git a/entities.d b/std/d/entities.d
similarity index 99%
rename from entities.d
rename to std/d/entities.d
index 8b276cb..c4e8bc3 100644
--- a/entities.d
+++ b/std/d/entities.d
@@ -1,7 +1,15 @@
-// Copyright Brian Schott (Sir Alaran) 2012.
-// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)module entities;
+// Written in the D programming language
+
+/**
+ * Contains listing of named entities for the D lexer.
+ *
+ * Copyright: Brian Schott 2013
+ * License: Boost License 1.0.
+ * Authors: Brian Schott
+ * Source: $(PHOBOSSRC std/d/_lexer.d)
+ */
+
+module std.d.entities;
/**
* Generated from $(LINK http://www.w3.org/TR/html5/entities.json)
diff --git a/tokenizer.d b/std/d/lexer.d
similarity index 56%
rename from tokenizer.d
rename to std/d/lexer.d
index 3dea8ad..4793911 100644
--- a/tokenizer.d
+++ b/std/d/lexer.d
@@ -1,26 +1,658 @@
-// Copyright Brian Schott (Sir Alaran) 2012.
-// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
+// Written in the D programming language
-module tokenizer;
+/**
+ * This module contains a range-based lexer for the D programming language.
+ *
+ * Copyright: Brian Schott 2013
+ * License: Boost License 1.0.
+ * Authors: Brian Schott
+ * Source: $(PHOBOSSRC std/d/_lexer.d)
+ */
+
+module std.d.lexer;
import std.range;
-import std.file;
import std.traits;
import std.algorithm;
import std.conv;
import std.uni;
-import std.stdio;
import std.ascii;
-import std.format;
import std.exception;
+import std.d.entities;
-import langutils;
-import codegen;
-import entities;
+public:
-pure bool isNewline(R)(R range)
+/**
+ * Represents a D token
+ */
+struct Token
+{
+ /// The token type.
+ TokenType type;
+
+ /// The representation of the token in the original source code.
+ string value;
+
+ /// The number of the line the token is on.
+ uint lineNumber;
+
+ /// The character index of the start of the token in the original text.
+ uint startIndex;
+
+ /**
+ * Check to see if the token is of the same type and has the same string
+ * representation as the given token.
+ */
+ bool opEquals(ref const(Token) other) const
+ {
+ return other.type == type && other.value == value;
+ }
+
+ /**
+ * Checks to see if the token's string representation is equal to the given
+ * string.
+ */
+ bool opEquals(string value) const { return this.value == value; }
+
+ /**
+ * Checks to see if the token is of the given type.
+ */
+ bool opEquals(TokenType type) const { return type == type; }
+
+ /**
+ * Comparison operator orders tokens by start index.
+ */
+ int opCmp(size_t i) const
+ {
+ if (startIndex < i) return -1;
+ if (startIndex > i) return 1;
+ return 0;
+ }
+}
+
+/**
+ * Configure the behavior of the byToken() function
+ */
+enum IterationStyle
+{
+ /// Only include code, not whitespace or comments
+ CodeOnly = 0,
+ /// Includes comments
+ IncludeComments = 0b01,
+ /// Includes whitespace
+ IncludeWhitespace = 0b10,
+ /// Include everything
+ Everything = IncludeComments | IncludeWhitespace
+}
+
+/**
+ * Configuration of the string lexing style
+ */
+enum StringStyle : uint
+{
+ /**
+ * Escape sequences will be replaced with their equivalent characters,
+ * enclosing quote characters will not be included. Useful for creating a
+ * compiler or interpreter.
+ */
+ Default = 0b0000,
+
+ /**
+ * Escape sequences will not be processed. An escaped quote character will
+ * not terminate string lexing, but it will not be replaced with the quote
+ * character in the token.
+ */
+ NotEscaped = 0b0001,
+
+ /**
+ * Strings will include their opening and closing quote characters as well
+ * as any prefixes or suffixes $(LPAREN)e.g.: $(D_STRING "abcde"w) will
+ * include the $(D_STRING 'w') character as well as the opening and closing
+ * quotes$(RPAREN)
+ */
+ IncludeQuotes = 0x0010,
+
+ /**
+ * Strings will be read exactly as they appeared in the source, including
+ * their opening and closing quote characters. Useful for syntax
+ * highlighting.
+ */
+ Source = NotEscaped | IncludeQuotes,
+}
+
+/**
+ * Iterate over the given range of characters by D tokens.
+ * Params:
+ * range = the range of characters
+ * iterationStyle = See IterationStyle
+ * stringStyle = see StringStyle
+ * Returns:
+ * an input range of tokens
+ */
+TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
+ const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
+{
+ auto r = new TokenRange!(R)(range);
+ r.stringStyle = stringStyle;
+ r.iterStyle = iterationStyle;
+ r.lineNumber = 1;
+ r.popFront();
+ return r;
+}
+
+/**
+ * Range of tokens
+ */
+class TokenRange(R) : InputRange!(Token)
+{
+ this(ref R range)
+ {
+ this.range = range;
+ }
+
+ /**
+ * Returns: true if the range is empty
+ */
+ override bool empty() const @property
+ {
+ return _empty;
+ }
+
+ /**
+ * Returns: the current token
+ */
+ override Token front() const @property
+ {
+ enforce(!_empty, "Cannot call front() on empty token range");
+ return current;
+ }
+
+ /**
+ * Returns the current token and then removes it from the range
+ */
+ override Token moveFront()
+ {
+ auto r = front();
+ popFront();
+ return r;
+ }
+
+ override int opApply(int delegate(Token) dg)
+ {
+ int result = 0;
+ while (!empty)
+ {
+ result = dg(front);
+ if (result)
+ break;
+ popFront();
+ }
+ return result;
+ }
+
+ override int opApply(int delegate(size_t, Token) dg)
+ {
+ int result = 0;
+ int i = 0;
+ while (!empty)
+ {
+ result = dg(i, front);
+ if (result)
+ break;
+ popFront();
+ }
+ return result;
+ }
+
+ /**
+ * Removes the current token from the range
+ */
+ override void popFront()
+ {
+ if (range.empty)
+ {
+ _empty = true;
+ return;
+ }
+
+ current = Token.init;
+ current.lineNumber = lineNumber;
+ current.startIndex = index;
+
+ while (std.uni.isWhite(range.front))
+ {
+ if (iterStyle == IterationStyle.Everything)
+ {
+ current = lexWhitespace(range, index, lineNumber);
+ return;
+ }
+ else
+ lexWhitespace(range, index, lineNumber);
+ }
+ outer: switch (range.front)
+ {
+ mixin(generateCaseTrie(
+ "=", "TokenType.Assign",
+ "&", "TokenType.BitAnd",
+ "&=", "TokenType.BitAndEquals",
+ "|", "TokenType.BitOr",
+ "|=", "TokenType.BitOrEquals",
+ "~=", "TokenType.CatEquals",
+ ":", "TokenType.Colon",
+ ",", "TokenType.Comma",
+ "$", "TokenType.Dollar",
+ ".", "TokenType.Dot",
+ "==", "TokenType.Equals",
+ "=>", "TokenType.GoesTo",
+ ">", "TokenType.Greater",
+ ">=", "TokenType.GreaterEqual",
+ "#", "TokenType.Hash",
+ "&&", "TokenType.LogicAnd",
+ "{", "TokenType.LBrace",
+ "[", "TokenType.LBracket",
+ "<", "TokenType.Less",
+ "<=", "TokenType.LessEqual",
+ "<>=", "TokenType.LessEqualGreater",
+ "<>", "TokenType.LessOrGreater",
+ "||", "TokenType.LogicOr",
+ "(", "TokenType.LParen",
+ "-", "TokenType.Minus",
+ "-=", "TokenType.MinusEquals",
+ "%", "TokenType.Mod",
+ "%=", "TokenType.ModEquals",
+ "*=", "TokenType.MulEquals",
+ "!", "TokenType.Not",
+ "!=", "TokenType.NotEquals",
+ "!>", "TokenType.NotGreater",
+ "!>=", "TokenType.NotGreaterEqual",
+ "!<", "TokenType.NotLess",
+ "!<=", "TokenType.NotLessEqual",
+ "!<>", "TokenType.NotLessEqualGreater",
+ "+", "TokenType.Plus",
+ "+=", "TokenType.PlusEquals",
+ "^^", "TokenType.Pow",
+ "^^=", "TokenType.PowEquals",
+ "}", "TokenType.RBrace",
+ "]", "TokenType.RBracket",
+ ")", "TokenType.RParen",
+ ";", "TokenType.Semicolon",
+ "<<", "TokenType.ShiftLeft",
+ "<<=", "TokenType.ShiftLeftEqual",
+ ">>", "TokenType.ShiftRight",
+ ">>=", "TokenType.ShiftRightEqual",
+ "..", "TokenType.Slice",
+ "*", "TokenType.Star",
+ "?", "TokenType.Ternary",
+ "~", "TokenType.Tilde",
+ "--", "TokenType.Decrement",
+ "!<>=", "TokenType.Unordered",
+ ">>>", "TokenType.UnsignedShiftRight",
+ ">>>=", "TokenType.UnsignedShiftRightEqual",
+ "++", "TokenType.Increment",
+ "...", "TokenType.Vararg",
+ "^", "TokenType.Xor",
+ "^=", "TokenType.XorEquals",
+ "@", "TokenType.At",
+ ));
+ case '0': .. case '9':
+ current = lexNumber(range, index, lineNumber);
+ break;
+ case '\'':
+ case '"':
+ current = lexString(range, index, lineNumber, stringStyle);
+ break;
+ case '`':
+ current = lexString(range, index, lineNumber, stringStyle);
+ break;
+ case 'q':
+ auto r = range.save;
+ r.popFront();
+ if (!r.isEoF() && r.front == '{')
+ {
+ current = lexTokenString(range, index, lineNumber, stringStyle);
+ break;
+ }
+ else if (!r.isEoF() && r.front == '"')
+ {
+ current = lexDelimitedString(range, index, lineNumber,
+ stringStyle);
+ break;
+ }
+ else
+ goto default;
+ case '/':
+ auto r = range.save();
+ r.popFront();
+ if (r.isEoF())
+ {
+ current.type = TokenType.Div;
+ current.value = "/";
+ range.popFront();
+ ++index;
+ break;
+ }
+ switch (r.front)
+ {
+ case '/':
+ case '*':
+ case '+':
+ current = lexComment(range, index, lineNumber);
+ break outer;
+ case '=':
+ current.type = TokenType.DivEquals;
+ current.value = "/=";
+ range.popFront();
+ range.popFront();
+ index += 2;
+ break outer;
+ default:
+ current.type = TokenType.Div;
+ current.value = "/";
+ ++index;
+ range.popFront();
+ break outer;
+ }
+ case 'r':
+ auto r = range.save();
+ r.popFront();
+ if (!r.isEoF() && r.front == '"')
+ {
+ current = lexString(range, index, lineNumber, stringStyle);
+ break;
+ }
+ else
+ goto default;
+ case 'x':
+ auto r = range.save();
+ r.popFront();
+ if (!r.isEoF() && r.front == '"')
+ {
+ current = lexHexString(range, index, lineNumber);
+ break;
+ }
+ else
+ goto default;
+ default:
+ auto app = appender!(ElementType!(R)[])();
+ while(!range.isEoF() && !isSeparating(range.front))
+ {
+ app.put(range.front);
+ range.popFront();
+ ++index;
+ }
+ current.value = to!string(app.data);
+ current.type = lookupTokenType(current.value);
+ break;
+ }
+ }
+
+private:
+ Token current;
+ uint lineNumber;
+ uint index;
+ R range;
+ bool _empty;
+ IterationStyle iterStyle;
+ StringStyle stringStyle;
+}
+
+/**
+ * Listing of all the tokens in the D language.
+ *
+ * Token types are arranged so that it is easy to group tokens while iterating
+ * over them. For example:
+ * ---
+ * assert(TokenType.Increment < TokenType.OPERATORS_END);
+ * assert(TokenType.Increment > TokenType.OPERATORS_BEGIN);
+ * ---
+ * The non-token values are documented below:
+ *
+ * $(BOOKTABLE ,
+ * $(TR $(TH Begin) $(TH End) $(TH Content) $(TH Examples))
+ * $(TR $(TD OPERATORS_BEGIN) $(TD OPERATORS_END) $(TD operatiors) $(TD +, -, <<=))
+ * $(TR $(TD TYPES_BEGIN) $(TD TYPES_END) $(TD types) $(TD bool, char, double))
+ * $(TR $(TD KEYWORDS_BEGIN) $(TD KEYWORDS) $(TD keywords) $(TD class, if, assert))
+ * $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD attributes) $(TD override synchronized, __gshared))
+ * $(TR $(TD ATTRIBUTES_BEGIN) $(TD ATTRIBUTES_END) $(TD protection) $(TD public, protected))
+ * $(TR $(TD CONSTANTS_BEGIN) $(TD CONSTANTS_END) $(TD compile-time constants) $(TD __FILE__, __TIME__))
+ * $(TR $(TD LITERALS_BEGIN) $(TD LITERALS_END) $(TD string and numeric literals) $(TD "str", 123))
+ * $(TR $(TD NUMBERS_BEGIN) $(TD NUMBERS_END) $(TD numeric literals) $(TD 0x123p+9, 0b0110))
+ * $(TR $(TD STRINGS_BEGIN) $(TD STRINGS_END) $(TD string literals) $(TD `123`c, q{tokens;}, "abcde"))
+ * $(TR $(TD MISC_BEGIN) $(TD MISC_END) $(TD anything else) $(TD whitespace, comments, identifiers))
+ * )
+ * Note that several of the above ranges overlap.
+ */
+enum TokenType: uint
+{
+ // Operators
+ OPERATORS_BEGIN, ///
+ Assign, /// =
+ At, /// @
+ BitAnd, /// &
+ BitAndEquals, /// &=
+ BitOr, /// |
+ BitOrEquals, /// |=
+ CatEquals, /// ~=
+ Colon, /// :
+ Comma, /// ,
+ Decrement, /// --
+ Div, /// /
+ DivEquals, /// /=
+ Dollar, /// $
+ Dot, /// .
+ Equals, /// ==
+ GoesTo, // =>
+ Greater, /// >
+ GreaterEqual, /// >=
+ Hash, // #
+ Increment, /// ++
+ LBrace, /// {
+ LBracket, /// [
+ Less, /// <
+ LessEqual, /// <=
+ LessEqualGreater, // <>=
+ LessOrGreater, /// <>
+ LogicAnd, /// &&
+ LogicOr, /// ||
+ LParen, /// $(LPAREN)
+ Minus, /// -
+ MinusEquals, /// -=
+ Mod, /// %
+ ModEquals, /// %=
+ MulEquals, /// *=
+ Not, /// !
+ NotEquals, /// !=
+ NotGreater, /// !>
+ NotGreaterEqual, /// !>=
+ NotLess, /// !<
+ NotLessEqual, /// !<=
+ NotLessEqualGreater, /// !<>
+ Plus, /// +
+ PlusEquals, /// +=
+ Pow, /// ^^
+ PowEquals, /// ^^=
+ RBrace, /// }
+ RBracket, /// ]
+ RParen, /// $(RPAREN)
+ Semicolon, /// ;
+ ShiftLeft, /// <<
+ ShiftLeftEqual, /// <<=
+ ShiftRight, /// >>
+ ShiftRightEqual, /// >>=
+ Slice, // ..
+ Star, /// *
+ Ternary, /// ?
+ Tilde, /// ~
+ Unordered, /// !<>=
+ UnsignedShiftRight, /// >>>
+ UnsignedShiftRightEqual, /// >>>=
+ Vararg, /// ...
+ Xor, /// ^
+ XorEquals, /// ^=
+ OPERATORS_END, ///
+
+ // Types
+ TYPES_BEGIN, ///
+ Bool, /// bool,
+ Byte, /// byte,
+ Cdouble, /// cdouble,
+ Cent, /// cent,
+ Cfloat, /// cfloat,
+ Char, /// char,
+ Creal, /// creal,
+ Dchar, /// dchar,
+ Double, /// double,
+ DString, /// dstring
+ Float, /// float,
+ Function, /// function,
+ Idouble, /// idouble,
+ Ifloat, /// ifloat,
+ Int, /// int,
+ Ireal, /// ireal,
+ Long, /// long,
+ Real, /// real,
+ Short, /// short,
+ String, /// string
+ Ubyte, /// ubyte,
+ Ucent, /// ucent,
+ Uint, /// uint,
+ Ulong, /// ulong,
+ Ushort, /// ushort,
+ Void, /// void,
+ Wchar, /// wchar,
+ WString, /// wstring
+ TYPES_END, ///
+
+ Template, /// template,
+
+ // Keywords
+ KEYWORDS_BEGIN, ///
+ ATTRIBUTES_BEGIN, ///
+ Align, /// align,
+ Deprecated, /// deprecated,
+ Extern, /// extern,
+ Pragma, /// pragma,
+ PROTECTION_BEGIN, ///
+ Export, /// export,
+ Package, /// package,
+ Private, /// private,
+ Protected, /// protected,
+ Public, /// public,
+ PROTECTION_END, ///
+ Abstract, /// abstract,
+ AtDisable, /// @disable
+ Auto, /// auto,
+ Const, /// const,
+ Final, /// final
+ Gshared, /// __gshared,
+ Immutable, // immutable,
+ Inout, // inout,
+ Scope, /// scope,
+ Shared, // shared,
+ Static, /// static,
+ Synchronized, /// synchronized,
+ ATTRIBUTES_END, ///
+ Alias, /// alias,
+ Asm, /// asm,
+ Assert, /// assert,
+ Body, /// body,
+ Break, /// break,
+ Case, /// case,
+ Cast, /// cast,
+ Catch, /// catch,
+ Class, /// class,
+ Continue, /// continue,
+ Debug, /// debug,
+ Default, /// default,
+ Delegate, /// delegate,
+ Delete, /// delete,
+ Do, /// do,
+ Else, /// else,
+ Enum, /// enum,
+ False, /// false,
+ Finally, /// finally,
+ Foreach, /// foreach,
+ Foreach_reverse, /// foreach_reverse,
+ For, /// for,
+ Goto, /// goto,
+ If, /// if ,
+ Import, /// import,
+ In, /// in,
+ Interface, /// interface,
+ Invariant, /// invariant,
+ Is, /// is,
+ Lazy, /// lazy,
+ Macro, /// macro,
+ Mixin, /// mixin,
+ Module, /// module,
+ New, /// new,
+ Nothrow, /// nothrow,
+ Null, /// null,
+ Out, /// out,
+ Override, /// override,
+ Pure, /// pure,
+ Ref, /// ref,
+ Return, /// return,
+ Struct, /// struct,
+ Super, /// super,
+ Switch, /// switch ,
+ This, /// this,
+ Throw, /// throw,
+ True, /// true,
+ Try, /// try,
+ Typedef, /// typedef,
+ Typeid, /// typeid,
+ Typeof, /// typeof,
+ Union, /// union,
+ Unittest, /// unittest,
+ Version, /// version,
+ Volatile, /// volatile,
+ While, /// while ,
+ With, /// with,
+ KEYWORDS_END, ///
+
+ // Constants
+ CONSTANTS_BEGIN,
+ File, /// __FILE__,
+ Line, /// __LINE__,
+ Thread, /// __thread,
+ Traits, /// __traits,
+ CONSTANTS_END, ///
+
+ // Misc
+ MISC_BEGIN, ///
+ Comment, /// /** comment */ or // comment or ///comment
+ Identifier, /// anything else
+ ScriptLine, // Line at the beginning of source file that starts from #!
+ Whitespace, /// whitespace
+ MISC_END, ///
+
+ // Literals
+ LITERALS_BEGIN, ///
+ NUMBERS_BEGIN, ///
+ DoubleLiteral, /// 123.456
+ FloatLiteral, /// 123.456f or 0x123_45p-af
+ IDoubleLiteral, /// 123.456i
+ IFloatLiteral, /// 123.456fi
+ IntLiteral, /// 123 or 0b1101010101
+ LongLiteral, /// 123L
+ RealLiteral, /// 123.456L
+ IRealLiteral, /// 123.456Li
+ UnsignedIntLiteral, /// 123u
+ UnsignedLongLiteral, /// 123uL
+ NUMBERS_END, ///
+ STRINGS_BEGIN, ///
+ DStringLiteral, /// "32-bit character string"d
+ StringLiteral, /// "a string"
+ WStringLiteral, /// "16-bit character string"w
+ STRINGS_END, ///
+ LITERALS_END, ///
+}
+
+// Implementation details follow
+private:
+
+private pure bool isNewline(R)(R range)
{
return range.front == '\n' || range.front == '\r';
}
@@ -30,7 +662,8 @@ pure bool isEoF(R)(R range)
return range.empty || range.front == 0 || range.front == 0x1a;
}
-C[] popNewline(R, C = ElementType!R)(ref R range, ref uint index) if (isSomeChar!C && isForwardRange!R)
+C[] popNewline(R, C = ElementType!R)(ref R range, ref uint index)
+ if (isSomeChar!C && isForwardRange!R)
{
C[] chars;
if (range.front == '\r')
@@ -56,11 +689,8 @@ unittest
assert (s == "test");
}
-/**
- * Returns:
- */
-Token lexWhitespace(R, C = ElementType!R)(ref R range, ref uint index, ref uint lineNumber)
- if (isForwardRange!R && isSomeChar!C)
+Token lexWhitespace(R, C = ElementType!R)(ref R range, ref uint index,
+ ref uint lineNumber) if (isForwardRange!R && isSomeChar!C)
{
Token t;
t.type = TokenType.Whitespace;
@@ -97,15 +727,6 @@ unittest
assert (lineNum == 3);
}
-/**
- * Increments endIndex until it indexes a character directly after a comment
- * Params:
- * inputString = the source code to examine
- * endIndex = an index into inputString at the second character of a
- * comment, i.e. points at the second slash in a // comment.
- * lineNumber = the line number that corresponds to endIndex
- * Returns: The comment
- */
Token lexComment(R, C = ElementType!R)(ref R input, ref uint index, ref uint lineNumber)
if (isSomeChar!C && isForwardRange!R)
in
@@ -252,9 +873,6 @@ unittest
assert (comment == "");
}
-/**
- * Pops up to upTo hex chars from the input range and returns them as a string
- */
string popDigitChars(R, C = ElementType!R, alias isInterestingDigit)(ref R input, ref uint index,
uint upTo) if (isSomeChar!C && isForwardRange!R)
{
@@ -628,6 +1246,250 @@ unittest
assert (lexString(g, i, l) == "a\nb");
}
+Token lexDelimitedString(R)(ref R input, ref uint index,
+ ref uint lineNumber, const StringStyle stringStyle = StringStyle.Default)
+in
+{
+ assert(input.front == 'q');
+}
+body
+{
+ auto app = appender!(ElementType!R[])();
+ Token t;
+ t.startIndex = index;
+ t.lineNumber = lineNumber;
+ t.type = TokenType.StringLiteral;
+
+ input.popFront(); // q
+ input.popFront(); // "
+ index += 2;
+ if (stringStyle & StringStyle.IncludeQuotes)
+ {
+ app.put('q');
+ app.put('"');
+ }
+
+ bool heredoc;
+ ElementType!R open;
+ ElementType!R close;
+
+ switch (input.front)
+ {
+ case '[': open = '['; close = ']'; break;
+ case '{': open = '{'; close = '}'; break;
+ case '(': open = '('; close = ')'; break;
+ case '<': open = '<'; close = '>'; break;
+ default: heredoc = true; break;
+ }
+
+ if (heredoc)
+ {
+ auto hereOpen = appender!(ElementType!(R)[])();
+ while (!input.isEoF() && !std.uni.isWhite(input.front))
+ {
+ hereOpen.put(input.front());
+ input.popFront();
+ }
+ if (input.isNewline())
+ {
+ ++lineNumber;
+ input.popNewline(index);
+ }
+// else
+// this is an error
+ while (!input.isEoF())
+ {
+ if (isNewline(input))
+ {
+ ++lineNumber;
+ app.put(input.popNewline(index));
+ }
+ else if (input.front == '"' && app.data.endsWith(hereOpen.data))
+ {
+ app.put('"');
+ ++index;
+ input.popFront();
+ if (stringStyle & StringStyle.IncludeQuotes)
+ t.value = to!string(app.data);
+ else
+ t.value = to!string(app.data[0 .. app.data.length - hereOpen.data.length - 1]);
+ break;
+ }
+ else
+ {
+ app.put(input.front);
+ ++index;
+ input.popFront();
+ }
+ }
+ }
+ else
+ {
+ if (stringStyle & StringStyle.IncludeQuotes)
+ app.put(input.front);
+ input.popFront();
+ int depth = 1;
+ while (depth > 0 && !input.isEoF())
+ {
+ if (isNewline(input))
+ app.put(popNewline(input, index));
+ else
+ {
+ if (input.front == close)
+ {
+ --depth;
+ if (depth == 0)
+ {
+ if (stringStyle & StringStyle.IncludeQuotes)
+ {
+ app.put(close);
+ app.put('"');
+ }
+ input.popFront();
+ input.popFront();
+ break;
+ }
+ }
+ else if (input.front == open)
+ ++depth;
+ app.put(input.front);
+ input.popFront();
+ ++index;
+ }
+ }
+ }
+ if (!input.isEoF())
+ {
+ switch (input.front)
+ {
+ case 'w':
+ t.type = TokenType.WStringLiteral;
+ goto case 'c';
+ case 'd':
+ t.type = TokenType.DStringLiteral;
+ goto case 'c';
+ case 'c':
+ if (stringStyle & StringStyle.IncludeQuotes)
+ app.put(input.front);
+ input.popFront();
+ ++index;
+ break;
+ default:
+ break;
+ }
+ }
+ if (t.value is null)
+ t.value = to!string(app.data);
+ return t;
+}
+
+unittest
+{
+ uint i;
+ uint l;
+ auto a = `q"{abc{}de}"`;
+ auto ar = lexDelimitedString(a, i, l);
+ assert (ar == "abc{}de");
+ assert (ar == TokenType.StringLiteral);
+
+ auto b = "q\"abcde\n123\nabcde\"w";
+ auto br = lexDelimitedString(b, i, l);
+ assert (br == "123\n");
+ assert (br == TokenType.WStringLiteral);
+
+ auto c = `q"[ ]");`;
+ auto cr = lexDelimitedString(c, i, l, StringStyle.Source);
+ assert (cr == `q"[ ]"`);
+ assert (cr == TokenType.StringLiteral);
+}
+
+Token lexTokenString(R)(ref R input, ref uint index, ref uint lineNumber,
+ const StringStyle stringStyle = StringStyle.Default)
+in
+{
+ assert (input.front == 'q');
+}
+body
+{
+ Token t;
+ t.startIndex = index;
+ t.type = TokenType.StringLiteral;
+ t.lineNumber = lineNumber;
+ auto app = appender!(ElementType!(R)[])();
+ input.popFront(); // q
+ input.popFront(); // {
+ index += 2;
+ if (stringStyle & StringStyle.IncludeQuotes)
+ {
+ app.put('q');
+ app.put('{');
+ }
+ auto r = byToken(input, IterationStyle.Everything, StringStyle.Source);
+ r.index = index;
+ int depth = 1;
+ while (!r.empty)
+ {
+ if (r.front == TokenType.LBrace)
+ {
+ ++depth;
+ }
+ else if (r.front == TokenType.RBrace)
+ {
+ --depth;
+ if (depth <= 0)
+ {
+ if (stringStyle & StringStyle.IncludeQuotes)
+ app.put('}');
+ r.popFront();
+ break;
+ }
+ }
+ app.put(r.front.value);
+ r.popFront();
+ }
+
+ auto n = app.data.length - (stringStyle & StringStyle.IncludeQuotes ? 2 : 0);
+ input.popFrontN(n);
+ if (!input.isEoF())
+ {
+ switch (input.front)
+ {
+ case 'w':
+ t.type = TokenType.WStringLiteral;
+ goto case 'c';
+ case 'd':
+ t.type = TokenType.DStringLiteral;
+ goto case 'c';
+ case 'c':
+ if (stringStyle & StringStyle.IncludeQuotes)
+ app.put(input.front);
+ input.popFront();
+ ++index;
+ break;
+ default:
+ break;
+ }
+ }
+ t.value = to!string(app.data);
+ index = r.index;
+ return t;
+}
+
+unittest
+{
+ uint i;
+ uint l;
+ auto a = "q{import std.stdio;}";
+ auto ar = lexTokenString(a, i, l);
+ assert (ar == TokenType.StringLiteral);
+ assert (ar == "import std.stdio;");
+
+ auto b = `q{writeln("hello world");}`;
+ auto br = lexTokenString(b, i, l, StringStyle.Source);
+ assert (br == TokenType.StringLiteral);
+ assert (br == `q{writeln("hello world");}`);
+}
+
Token lexNumber(R)(ref R input, ref uint index, const uint lineNumber)
in
{
@@ -635,7 +1497,7 @@ in
}
body
{
- auto app = appender!(char[])();
+ auto app = appender!(ElementType!(R)[])();
// hex and binary can start with zero, anything else is decimal
if (input.front != '0')
return lexDecimal(input, index, lineNumber, app);
@@ -672,8 +1534,8 @@ unittest
assert (lexNumber(a, i, l) == "0");
}
-Token lexBinary(R)(ref R input, ref uint index, const uint lineNumber,
- ref typeof(appender!(char[])()) app)
+Token lexBinary(R, A)(ref R input, ref uint index, const uint lineNumber,
+ ref A app)
{
Token token;
token.lineNumber = lineNumber;
@@ -777,8 +1639,8 @@ unittest
}
-Token lexDecimal(R)(ref R input, ref uint index, const uint lineNumber,
- ref typeof(appender!(char[])()) app)
+Token lexDecimal(R, A)(ref R input, ref uint index, const uint lineNumber,
+ ref A app)
{
bool lexingSuffix = false;
bool isLong = false;
@@ -940,7 +1802,8 @@ Token lexDecimal(R)(ref R input, ref uint index, const uint lineNumber,
}
-unittest {
+unittest
+{
uint i;
uint l;
auto a = "55e-4";
@@ -1044,8 +1907,8 @@ unittest {
assert (xr == TokenType.DoubleLiteral);
}
-Token lexHex(R)(ref R input, ref uint index, const uint lineNumber,
- ref typeof(appender!(char[])()) app)
+Token lexHex(R, A)(ref R input, ref uint index, const uint lineNumber,
+ ref A app)
{
bool isLong = false;
bool isUnsigned = false;
@@ -1208,10 +2071,6 @@ unittest
assert (pr == TokenType.DoubleLiteral);
}
-/**
- * Returns: true if ch marks the ending of one token and the beginning of
- * another, false otherwise
- */
pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
{
switch (ch)
@@ -1230,260 +2089,278 @@ pure nothrow bool isSeparating(C)(C ch) if (isSomeChar!C)
}
}
-/**
- * Configure the tokenize() function
- */
-enum IterationStyle
+pure nothrow TokenType lookupTokenType(const string input)
{
- /// Only include code, not whitespace or comments
- CodeOnly = 0,
- /// Includes comments
- IncludeComments = 0b01,
- /// Includes whitespace
- IncludeWhitespace = 0b10,
- /// Include everything
- Everything = IncludeComments | IncludeWhitespace
-}
-
-/**
- * Configuration of the token lexing style
- */
-enum StringStyle : uint
-{
- /// Escape sequences will be replaced with their equivalent characters.
- /// Quote characters will not be included
- Default = 0b0000,
-
- /// Escape sequences will not be processed
- NotEscaped = 0b0001,
-
- /// Strings will include their opening and closing quote characters as well
- /// as any prefixes or suffixes (e.g.: "abcde"w will include the 'w'
- /// character)
- IncludeQuotes = 0x0010,
-
- /// Strings will be read exactly as they appeared in the source, including
- /// their opening and closing quote characters. Useful for syntax highlighting.
- Source = NotEscaped | IncludeQuotes,
-}
-
-TokenRange!(R) byToken(R)(R range, const IterationStyle iterationStyle = IterationStyle.CodeOnly,
- const StringStyle stringStyle = StringStyle.Default) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
-{
- auto r = TokenRange!(R)(range);
- r.stringStyle = stringStyle;
- r.iterStyle = iterationStyle;
- r.lineNumber = 1;
- r.popFront();
- return r;
-}
-
-struct TokenRange(R) if (isForwardRange!(R) && isSomeChar!(ElementType!(R)))
-{
- this(ref R range)
+ switch(input.length)
{
- this.range = range;
- }
-
- bool empty() @property
- {
- return _empty;
- }
-
- Token front() const @property
- {
- enforce(!_empty, "Cannot call popFront() on empty token range");
- return current;
- }
-
- Token popFront()
- {
- if (range.isEoF())
+ case 2:
+ switch (input)
{
- _empty = true;
- return current;
+ case "do": return TokenType.Do;
+ case "if": return TokenType.If;
+ case "in": return TokenType.In;
+ case "is": return TokenType.Is;
+ default: break;
}
-
- Token c = current;
- current = Token.init;
- current.lineNumber = lineNumber;
- current.startIndex = index;
-
- while (std.uni.isWhite(range.front))
+ break;
+ case 3:
+ switch (input)
{
- if (iterStyle == IterationStyle.Everything)
+ case "asm": return TokenType.Asm;
+ case "for": return TokenType.For;
+ case "int": return TokenType.Int;
+ case "new": return TokenType.New;
+ case "out": return TokenType.Out;
+ case "ref": return TokenType.Ref;
+ case "try": return TokenType.Try;
+ default: break;
+ }
+ break;
+ case 4:
+ switch (input)
+ {
+ case "auto": return TokenType.Auto;
+ case "body": return TokenType.Body;
+ case "bool": return TokenType.Bool;
+ case "byte": return TokenType.Byte;
+ case "case": return TokenType.Case;
+ case "cast": return TokenType.Cast;
+ case "cent": return TokenType.Cent;
+ case "char": return TokenType.Char;
+ case "else": return TokenType.Else;
+ case "enum": return TokenType.Enum;
+ case "goto": return TokenType.Goto;
+ case "lazy": return TokenType.Lazy;
+ case "long": return TokenType.Long;
+ case "null": return TokenType.Null;
+ case "pure": return TokenType.Pure;
+ case "real": return TokenType.Real;
+ case "this": return TokenType.This;
+ case "true": return TokenType.True;
+ case "uint": return TokenType.Uint;
+ case "void": return TokenType.Void;
+ case "with": return TokenType.With;
+ default: break;
+ }
+ break;
+ case 5:
+ switch (input)
+ {
+ case "alias": return TokenType.Alias;
+ case "align": return TokenType.Align;
+ case "break": return TokenType.Break;
+ case "catch": return TokenType.Catch;
+ case "class": return TokenType.Class;
+ case "const": return TokenType.Const;
+ case "creal": return TokenType.Creal;
+ case "dchar": return TokenType.Dchar;
+ case "debug": return TokenType.Debug;
+ case "false": return TokenType.False;
+ case "final": return TokenType.Final;
+ case "float": return TokenType.Float;
+ case "inout": return TokenType.Inout;
+ case "ireal": return TokenType.Ireal;
+ case "macro": return TokenType.Macro;
+ case "mixin": return TokenType.Mixin;
+ case "scope": return TokenType.Scope;
+ case "short": return TokenType.Short;
+ case "super": return TokenType.Super;
+ case "throw": return TokenType.Throw;
+ case "ubyte": return TokenType.Ubyte;
+ case "ucent": return TokenType.Ucent;
+ case "ulong": return TokenType.Ulong;
+ case "union": return TokenType.Union;
+ case "wchar": return TokenType.Wchar;
+ case "while": return TokenType.While;
+ default: break;
+ }
+ break;
+ case 6:
+ switch (input)
+ {
+ case "assert": return TokenType.Assert;
+ case "cfloat": return TokenType.Cfloat;
+ case "delete": return TokenType.Delete;
+ case "double": return TokenType.Double;
+ case "export": return TokenType.Export;
+ case "extern": return TokenType.Extern;
+ case "ifloat": return TokenType.Ifloat;
+ case "import": return TokenType.Import;
+ case "module": return TokenType.Module;
+ case "pragma": return TokenType.Pragma;
+ case "public": return TokenType.Public;
+ case "return": return TokenType.Return;
+ case "shared": return TokenType.Shared;
+ case "static": return TokenType.Static;
+ case "string": return TokenType.String;
+ case "struct": return TokenType.Struct;
+ case "switch": return TokenType.Switch;
+ case "typeid": return TokenType.Typeid;
+ case "typeof": return TokenType.Typeof;
+ case "ushort": return TokenType.Ushort;
+ default: break;
+ }
+ break;
+ case 7:
+ switch (input)
+ {
+ case "cdouble": return TokenType.Cdouble;
+ case "default": return TokenType.Default;
+ case "dstring": return TokenType.DString;
+ case "finally": return TokenType.Finally;
+ case "foreach": return TokenType.Foreach;
+ case "idouble": return TokenType.Idouble;
+ case "nothrow": return TokenType.Nothrow;
+ case "package": return TokenType.Package;
+ case "private": return TokenType.Private;
+ case "typedef": return TokenType.Typedef;
+ case "version": return TokenType.Version;
+ case "wstring": return TokenType.WString;
+ default: break;
+ }
+ break;
+ case 8:
+ switch (input)
+ {
+ case "override": return TokenType.Override;
+ case "continue": return TokenType.Continue;
+ case "__LINE__": return TokenType.Line;
+ case "template": return TokenType.Template;
+ case "abstract": return TokenType.Abstract;
+ case "__thread": return TokenType.Thread;
+ case "__traits": return TokenType.Traits;
+ case "volatile": return TokenType.Volatile;
+ case "delegate": return TokenType.Delegate;
+ case "function": return TokenType.Function;
+ case "unittest": return TokenType.Unittest;
+ case "__FILE__": return TokenType.File;
+ default: break;
+ }
+ break;
+ case 9:
+ switch (input)
+ {
+ case "__gshared": return TokenType.Gshared;
+ case "immutable": return TokenType.Immutable;
+ case "interface": return TokenType.Interface;
+ case "invariant": return TokenType.Invariant;
+ case "protected": return TokenType.Protected;
+ default: break;
+ }
+ break;
+ case 10:
+ if (input == "deprecated")
+ return TokenType.Deprecated;
+ break;
+ case 11:
+ if (input == "synchronized")
+ return TokenType.Synchronized;
+ break;
+ case 13:
+ if (input == "foreach_reverse")
+ return TokenType.Foreach_reverse;
+ break;
+ default: break;
+ }
+ return TokenType.Identifier;
+}
+
+class Trie(K, V) if (isInputRange!K): TrieNode!(K, V)
+{
+ /**
+ * Adds the given value to the trie with the given key
+ */
+ void add(K key, V value) pure
+ {
+ TrieNode!(K,V) current = this;
+ foreach(keyPart; key)
+ {
+ if ((keyPart in current.children) is null)
{
- current = lexWhitespace(range, index, lineNumber);
- return c;
+ auto node = new TrieNode!(K, V);
+ current.children[keyPart] = node;
+ current = node;
}
else
- lexWhitespace(range, index, lineNumber);
+ current = current.children[keyPart];
}
- outer: switch (range.front)
- {
- mixin(generateCaseTrie(
- "=", "TokenType.Assign",
- "&", "TokenType.BitAnd",
- "&=", "TokenType.BitAndEquals",
- "|", "TokenType.BitOr",
- "|=", "TokenType.BitOrEquals",
- "~=", "TokenType.CatEquals",
- ":", "TokenType.Colon",
- ",", "TokenType.Comma",
- "$", "TokenType.Dollar",
- ".", "TokenType.Dot",
- "==", "TokenType.Equals",
- "=>", "TokenType.GoesTo",
- ">", "TokenType.Greater",
- ">=", "TokenType.GreaterEqual",
- "#", "TokenType.Hash",
- "&&", "TokenType.LogicAnd",
- "{", "TokenType.LBrace",
- "[", "TokenType.LBracket",
- "<", "TokenType.Less",
- "<=", "TokenType.LessEqual",
- "<>=", "TokenType.LessEqualGreater",
- "<>", "TokenType.LessOrGreater",
- "||", "TokenType.LogicOr",
- "(", "TokenType.LParen",
- "-", "TokenType.Minus",
- "-=", "TokenType.MinusEquals",
- "%", "TokenType.Mod",
- "%=", "TokenType.ModEquals",
- "*=", "TokenType.MulEquals",
- "!", "TokenType.Not",
- "!=", "TokenType.NotEquals",
- "!>", "TokenType.NotGreater",
- "!>=", "TokenType.NotGreaterEqual",
- "!<", "TokenType.NotLess",
- "!<=", "TokenType.NotLessEqual",
- "!<>", "TokenType.NotLessEqualGreater",
- "+", "TokenType.Plus",
- "+=", "TokenType.PlusEquals",
- "^^", "TokenType.Pow",
- "^^=", "TokenType.PowEquals",
- "}", "TokenType.RBrace",
- "]", "TokenType.RBracket",
- ")", "TokenType.RParen",
- ";", "TokenType.Semicolon",
- "<<", "TokenType.ShiftLeft",
- "<<=", "TokenType.ShiftLeftEqual",
- ">>", "TokenType.ShiftRight",
- ">>=", "TokenType.ShiftRightEqual",
- "..", "TokenType.Slice",
- "*", "TokenType.Star",
- "?", "TokenType.Ternary",
- "~", "TokenType.Tilde",
- "--", "TokenType.Decrement",
- "!<>=", "TokenType.Unordered",
- ">>>", "TokenType.UnsignedShiftRight",
- ">>>=", "TokenType.UnsignedShiftRightEqual",
- "++", "TokenType.Increment",
- "...", "TokenType.Vararg",
- "^", "TokenType.Xor",
- "^=", "TokenType.XorEquals",
- "@", "TokenType.At",
- ));
- case '0': .. case '9':
- current = lexNumber(range, index, lineNumber);
- break;
- case '\'':
- case '"':
- current = lexString(range, index, lineNumber, stringStyle);
- break;
- case '`':
- current = lexString(range, index, lineNumber, stringStyle);
- break;
- case 'q':
- /+auto r = range.save;
- r.popFront();
- if (!r.isEoF() && r.front == '{')
- {
- writeln("ParseTokenString");
- break;
- }
- else+/
- goto default;
- case '/':
- auto r = range.save();
- r.popFront();
- if (r.isEoF())
- {
- current.type = TokenType.Div;
- current.value = "/";
- range.popFront();
- ++index;
- break;
- }
- switch (r.front)
- {
- case '/':
- case '*':
- case '+':
- current = lexComment(range, index, lineNumber);
- break outer;
- case '=':
- current.type = TokenType.DivEquals;
- current.value = "/=";
- range.popFront();
- range.popFront();
- index += 2;
- break outer;
- default:
- current.type = TokenType.Div;
- current.value = "/";
- ++index;
- range.popFront();
- break outer;
- }
- case 'r':
- auto r = range.save();
- r.popFront();
- if (!r.isEoF() && r.front == '"')
- {
- current = lexString(range, index, lineNumber, stringStyle);
- break;
- }
- else
- goto default;
- case 'x':
- auto r = range.save();
- r.popFront();
- if (!r.isEoF() && r.front == '"')
- {
- current = lexHexString(range, index, lineNumber);
- break;
- }
- else
- goto default;
- default:
- auto app = appender!(ElementType!(R)[])();
- while(!range.isEoF() && !isSeparating(range.front))
- {
- app.put(range.front);
- range.popFront();
- ++index;
- }
- current.value = to!string(app.data);
- current.type = lookupTokenTypeOptimized(current.value);
- break;
- }
- return c;
+ current.value = value;
}
-
-private:
- Token current;
- uint lineNumber;
- uint index;
- R range;
- bool _empty;
- IterationStyle iterStyle;
- StringStyle stringStyle;
}
-unittest
+class TrieNode(K, V) if (isInputRange!K)
{
- auto c = `r"d:\path\foo.bat"`;
- foreach (t; byToken(c, IterationStyle.CodeOnly, StringStyle.Source))
- writeln(t.type, ": {", t.value, "}");
+ V value;
+ TrieNode!(K,V)[ElementType!K] children;
+}
+
+string printCaseStatements(K, V)(TrieNode!(K,V) node, string indentString)
+{
+ string caseStatement = "";
+ foreach(dchar k, TrieNode!(K,V) v; node.children)
+ {
+ caseStatement ~= indentString;
+ caseStatement ~= "case '";
+ caseStatement ~= k;
+ caseStatement ~= "':\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\tcurrent.value ~= '";
+ caseStatement ~= k;
+ caseStatement ~= "';\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\t++index;\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\trange.popFront();\n";
+ if (v.children.length > 0)
+ {
+ caseStatement ~= indentString;
+ caseStatement ~= "\tif (range.isEoF())\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\t{\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\t\tcurrent.type = " ~ node.children[k].value;
+ caseStatement ~= ";\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\t\tbreak;\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\t}\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\tswitch (range.front)\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\t{\n";
+ caseStatement ~= printCaseStatements(v, indentString ~ "\t");
+ caseStatement ~= indentString;
+ caseStatement ~= "\tdefault:\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\t\tcurrent.type = ";
+ caseStatement ~= v.value;
+ caseStatement ~= ";\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\t\tbreak;\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\t}\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\tbreak;\n";
+ }
+ else
+ {
+ caseStatement ~= indentString;
+ caseStatement ~= "\tcurrent.type = ";
+ caseStatement ~= v.value;
+ caseStatement ~= ";\n";
+ caseStatement ~= indentString;
+ caseStatement ~= "\tbreak;\n";
+ }
+ }
+ return caseStatement;
+}
+
+string generateCaseTrie(string[] args ...)
+{
+ auto t = new Trie!(string, string);
+ for(int i = 0; i < args.length; i+=2)
+ {
+ t.add(args[i], args[i+1]);
+ }
+ return printCaseStatements(t, "");
}