diff --git a/.gitmodules b/.gitmodules
old mode 100755
new mode 100644
index e69de29..c34a4b6
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "datapicked"]
+ path = datapicked
+ url = ./datapicked/
diff --git a/astprinter.d b/astprinter.d
index ec3921a..ae2e855 100644
--- a/astprinter.d
+++ b/astprinter.d
@@ -469,9 +469,9 @@ class XMLPrinter : ASTVisitor
output.writeln("
]");
- foreach (Token t; tokens)
+ while (!tokens.empty)
{
+ auto t = tokens.front;
+ tokens.popFront();
if (isBasicType(t.type))
writeSpan("type", str(t.type));
else if (isKeyword(t.type))
diff --git a/main.d b/main.d
index 33cfdf6..def1c6d 100644
--- a/main.d
+++ b/main.d
@@ -17,13 +17,14 @@ import std.stdio;
import std.range;
import stdx.d.lexer;
import stdx.d.parser;
+import dpick.buffer.buffer;
import highlighter;
-import stats;
-import ctags;
-import astprinter;
-import imports;
-import outliner;
+//import stats;
+//import ctags;
+//import astprinter;
+//import imports;
+//import outliner;
int main(string[] args)
{
@@ -91,69 +92,69 @@ int main(string[] args)
{
bool usingStdin = args.length == 1;
ubyte[] bytes = usingStdin ? readStdin() : readFile(args[1]);
- highlighter.highlight(byToken!(typeof(bytes), false, false)(bytes),
- args.length == 1 ? "stdin" : args[1]);
+ auto tokens = DLexer!(ubyte[])(bytes);
+ highlighter.highlight(tokens, args.length == 1 ? "stdin" : args[1]);
return 0;
}
- else if (ctags)
- {
- stdout.printCtags(expandArgs(args, recursive));
- }
- else
- {
- bool usingStdin = args.length == 1;
- if (sloc || tokenCount)
- {
- if (usingStdin)
- {
- auto tokens = byToken!(ubyte[], false, false)(readStdin());
- if (tokenCount)
- printTokenCount(stdout, "stdin", tokens);
- else
- printLineCount(stdout, "stdin", tokens);
- }
- else
- {
- ulong count;
- foreach (f; expandArgs(args, recursive))
- {
- auto tokens = byToken!(ubyte[])(readFile(f));
- if (tokenCount)
- count += printTokenCount(stdout, f, tokens);
- else
- count += printLineCount(stdout, f, tokens);
- }
- writefln("total:\t%d", count);
- }
- }
- else if (syntaxCheck)
- {
- auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
- parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
- }
- else if (imports)
- {
- auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
- auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
- auto visitor = new ImportPrinter;
- visitor.visit(mod);
- }
- else if (ast)
- {
- auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
- auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
- auto printer = new XMLPrinter;
- printer.output = stdout;
- printer.visit(mod);
- }
- else if (outline)
- {
- auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
- auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
- auto outliner = new Outliner(stdout);
- outliner.visit(mod);
- }
- }
+// else if (ctags)
+// {
+// stdout.printCtags(expandArgs(args, recursive));
+// }
+// else
+// {
+// bool usingStdin = args.length == 1;
+// if (sloc || tokenCount)
+// {
+// if (usingStdin)
+// {
+// auto tokens = byToken!(ubyte[], false, false)(readStdin());
+// if (tokenCount)
+// printTokenCount(stdout, "stdin", tokens);
+// else
+// printLineCount(stdout, "stdin", tokens);
+// }
+// else
+// {
+// ulong count;
+// foreach (f; expandArgs(args, recursive))
+// {
+// auto tokens = byToken!(ubyte[])(readFile(f));
+// if (tokenCount)
+// count += printTokenCount(stdout, f, tokens);
+// else
+// count += printLineCount(stdout, f, tokens);
+// }
+// writefln("total:\t%d", count);
+// }
+// }
+// else if (syntaxCheck)
+// {
+// auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
+// parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
+// }
+// else if (imports)
+// {
+// auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
+// auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
+// auto visitor = new ImportPrinter;
+// visitor.visit(mod);
+// }
+// else if (ast)
+// {
+// auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
+// auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
+// auto printer = new XMLPrinter;
+// printer.output = stdout;
+// printer.visit(mod);
+// }
+// else if (outline)
+// {
+// auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
+// auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
+// auto outliner = new Outliner(stdout);
+// outliner.visit(mod);
+// }
+// }
return 0;
}
diff --git a/main.html b/main.html
deleted file mode 100644
index 6f0976f..0000000
--- a/main.html
+++ /dev/null
@@ -1,276 +0,0 @@
-
-
-
-
-
-main.d
-
-
-
-
-// Copyright Brian Schott (Sir Alaran) 2012.
-// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-
-module main;
-
-import std.algorithm;
-import std.array;
-import std.conv;
-import std.file;
-import std.getopt;
-import std.parallelism;
-import std.path;
-import std.regex;
-import std.stdio;
-import std.range;
-import stdx.d.lexer;
-import stdx.d.parser;
-
-import highlighter;
-import stats;
-import ctags;
-import astprinter;
-import imports;
-import outliner;
-
-int main(string[] args)
-{
- bool sloc;
- bool highlight;
- bool ctags;
- bool recursive;
- bool format;
- bool help;
- bool tokenCount;
- bool syntaxCheck;
- bool ast;
- bool imports;
- bool muffin;
- bool outline;
-
- try
- {
- getopt(args, "sloc|l", &sloc, "highlight", &highlight,
- "ctags|c", &ctags, "recursive|r|R", &recursive, "help|h", &help,
- "tokenCount|t", &tokenCount, "syntaxCheck|s", &syntaxCheck,
- "ast|xml", &ast, "imports|i", &imports, "outline|o", &outline,
- "muffinButton", &muffin);
- }
- catch (Exception e)
- {
- stderr.writeln(e.msg);
- }
-
- if (muffin)
- {
- stdout.writeln(
-` ___________
- __(#*O 0** @%*)__
- _(%*o#*O%*0 #O#%##@)_
- (*#@%#o*@ #o%O*%@ #o #)
- \=====================/
- |I|I|I|I|I|I|I|I|I|I|
- |I|I|I|I|I|I|I|I|I|I|
- |I|I|I|I|I|I|I|I|I|I|
- |I|I|I|I|I|I|I|I|I|I|`);
- return 0;
- }
-
- if (help)
- {
- printHelp(args[0]);
- return 0;
- }
-
- auto optionCount = count!"a"([sloc, highlight, ctags, tokenCount,
- syntaxCheck, ast, imports, outline]);
- if (optionCount > 1)
- {
- stderr.writeln("Too many options specified");
- return 1;
- }
- else if (optionCount < 1)
- {
- printHelp(args[0]);
- return 1;
- }
-
- if (highlight)
- {
- bool usingStdin = args.length == 1;
- ubyte[] bytes = usingStdin ? readStdin() : readFile(args[1]);
- highlighter.highlight(byToken!(typeof(bytes), false, false)(bytes),
- args.length == 1 ? "stdin" : args[1]);
- return 0;
- }
- else if (ctags)
- {
- stdout.printCtags(expandArgs(args, recursive));
- }
- else
- {
- bool usingStdin = args.length == 1;
- if (sloc || tokenCount)
- {
- if (usingStdin)
- {
- auto tokens = byToken!(ubyte[], false, false)(readStdin());
- if (tokenCount)
- printTokenCount(stdout, "stdin", tokens);
- else
- printLineCount(stdout, "stdin", tokens);
- }
- else
- {
- ulong count;
- foreach (f; expandArgs(args, recursive))
- {
- auto tokens = byToken!(ubyte[])(readFile(f));
- if (tokenCount)
- count += printTokenCount(stdout, f, tokens);
- else
- count += printLineCount(stdout, f, tokens);
- }
- writefln("total:\t%d", count);
- }
- }
- else if (syntaxCheck)
- {
- auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
- parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
- }
- else if (imports)
- {
- auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
- auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
- auto visitor = new ImportPrinter;
- visitor.visit(mod);
- }
- else if (ast)
- {
- auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
- auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
- auto printer = new XMLPrinter;
- printer.output = stdout;
- printer.visit(mod);
- }
- else if (outline)
- {
- auto tokens = byToken(usingStdin ? readStdin() : readFile(args[1]));
- auto mod = parseModule(tokens.array(), usingStdin ? "stdin" : args[1]);
- auto outliner = new Outliner(stdout);
- outliner.visit(mod);
- }
- }
- return 0;
-}
-
-string[] expandArgs(string[] args, bool recursive)
-{
- if (recursive)
- {
- string[] rVal;
- foreach (arg; args[1 ..$])
- {
- if (isFile(arg) && arg.endsWith(`.d`) || arg.endsWith(`.di`))
- rVal abstract arg;
- else foreach (item; dirEntries(arg, SpanMode.breadth).map!(a => a.name))
- {
- if (isFile(item) && (item.endsWith(`.d`) || item.endsWith(`.di`)))
- rVal abstract item;
- else
- continue;
- }
- }
- return rVal;
- }
- else
- return args[1 .. $];
-}
-
-ubyte[] readStdin()
-{
- auto sourceCode = appender!(ubyte[])();
- ubyte[4096] buf;
- while (true)
- {
- auto b = stdin.rawRead(buf);
- if (b.length == 0)
- break;
- sourceCode.put(b);
- }
- return sourceCode.data;
-}
-
-ubyte[] readFile(string fileName)
-{
- if (!exists(fileName))
- {
- stderr.writefln("%s does not exist", fileName);
- return [];
- }
- File f = File(fileName);
- ubyte[] sourceCode = uninitializedArray!(ubyte[])(to!size_t(f.size));
- f.rawRead(sourceCode);
- return sourceCode;
-}
-
-void printHelp(string programName)
-{
- stderr.writefln(
-`
- Usage: %s options
-
-options:
- --help | -h
- Prints this help message
-
- --sloc | -l [sourceFiles]
- Prints the number of logical lines of code in the given
- source files. If no files are specified, input is read from stdin.
-
- --tokenCount | t [sourceFiles]
- Prints the number of tokens in the given source files. If no files are
- specified, input is read from stdin.
-
- --highlight [sourceFile] - Syntax-highlight the given source file. The
- resulting HTML will be written to standard output. If no files are
- specified, input is read from stdin.
-
- --imports | -i [sourceFile]
- Prints modules imported by the given source file. If no files are
- specified, input is read from stdin.
-
- --syntaxCheck | -s [sourceFile]
- Lexes and parses sourceFile, printing the line and column number of any
- syntax errors to stdout. One error or warning is printed per line.
- If no files are specified, input is read from stdin.
-
- --ctags | -c sourceFile
- Generates ctags information from the given source code file. Note that
- ctags information requires a filename, so stdin cannot be used in place
- of a filename.
-
- --ast | --xml sourceFile
- Generates an XML representation of the source files abstract syntax
- tree. If no files are specified, input is read from stdin.
-
- --recursive | -R | -r
- When used with --ctags, --tokenCount, or --sloc, dscanner will produce
- ctags output for all .d and .di files contained within the given
- directories and its sub-directories.`,
- programName);
-}
-
-
diff --git a/stdx/d/ast.d b/stdx/d/ast.d
index 691ce97..ba948d0 100644
--- a/stdx/d/ast.d
+++ b/stdx/d/ast.d
@@ -1203,13 +1203,13 @@ class ForStatement : ASTNode
public:
override void accept(ASTVisitor visitor)
{
- mixin (visitIfNotNull!(declarationOrStatement, test, increment,
- statementNoCaseNoDefault));
+ mixin (visitIfNotNull!(initialization, test, increment,
+ declarationOrStatement));
}
- /** */ DeclarationOrStatement declarationOrStatement;
+ /** */ DeclarationOrStatement initialization;
/** */ ExpressionStatement test;
/** */ Expression increment;
- /** */ StatementNoCaseNoDefault statementNoCaseNoDefault;
+ /** */ DeclarationOrStatement declarationOrStatement;
/** */ size_t startIndex;
}
@@ -2760,11 +2760,11 @@ class WhileStatement : ASTNode
public:
override void accept(ASTVisitor visitor)
{
- mixin (visitIfNotNull!(expression, statementNoCaseNoDefault));
+ mixin (visitIfNotNull!(expression, declarationOrStatement));
}
/** */ Expression expression;
- /** */ StatementNoCaseNoDefault statementNoCaseNoDefault;
+ /** */ DeclarationOrStatement declarationOrStatement;
/** */ size_t startIndex;
}
diff --git a/stdx/d/lexer.d b/stdx/d/lexer.d
index f185d37..ed0bf56 100644
--- a/stdx/d/lexer.d
+++ b/stdx/d/lexer.d
@@ -17,7 +17,7 @@ private enum staticTokens = [
private enum pseudoTokens = [
"\"", "`", "//", "/*", "/+", ".", "'", "0", "1", "2", "3", "4", "5", "6",
- "7", "8", "9", "#", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!",
+ "7", "8", "9", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!",
"\u2028", "\u2029"
];
@@ -57,24 +57,24 @@ public template tok(string token)
}
public alias stdx.lexer.TokenStructure!(IdType) Token;
-public auto byToken(R, bool skipComments = true, bool skipWhitespace = true)(R range)
-{
- pure nothrow bool isNotComment(const Token t) { return t.type != tok!"comment"; }
- pure nothrow bool isNotWhitespace(const Token t) { return t.type != tok!"whitespace"; }
- pure nothrow bool isNotEither(const Token t) { return t.type != tok!"whitespace" && t.type != tok!"comment"; }
-
- static if (skipComments)
- {
- static if (skipWhitespace)
- return DLexer!(R)(range).filter!isNotEither;
- else
- return DLexer!(R)(range).filter!isNotComment;
- }
- else static if (skipWhitespace)
- return DLexer!(R)(range).filter!isNotWhitespace;
- else
- return DLexer!(R)(range);
-}
+//public auto byToken(R, bool skipComments = true, bool skipWhitespace = true)(R range)
+//{
+// pure nothrow bool isNotComment(const Token t) { return t.type != tok!"comment"; }
+// pure nothrow bool isNotWhitespace(const Token t) { return t.type != tok!"whitespace"; }
+// pure nothrow bool isNotEither(const Token t) { return t.type != tok!"whitespace" && t.type != tok!"comment"; }
+// return new DLexer!(R)(range);
+// static if (skipComments)
+// {
+// static if (skipWhitespace)
+// return filter!isNotEither(tokens);
+// else
+// return filter!isNotComment(tokens);
+// }
+// else static if (skipWhitespace)
+// return filter!isNotWhitespace(tokens);
+// else
+// return tokens;
+//}
public bool isBasicType(IdType type) nothrow pure @safe
{
@@ -322,45 +322,50 @@ public struct DLexer(R)
{
import std.conv;
import core.vararg;
-
- mixin Lexer!(R, IdType, Token, isSeparating, lexIdentifier, staticTokens,
- dynamicTokens, pseudoTokens, possibleDefaultTokens);
+ import dpick.buffer.buffer;
+
+ private enum pseudoTokenHandlers = [
+ "\"", "lexStringLiteral",
+ "`", "lexWysiwygString",
+ "//", "lexSlashSlashComment",
+ "/*", "lexSlashStarComment",
+ "/+", "lexSlashPlusComment",
+ ".", "lexDot",
+ "'", "lexCharacterLiteral",
+ "0", "lexNumber",
+ "1", "lexNumber",
+ "2", "lexNumber",
+ "3", "lexNumber",
+ "4", "lexNumber",
+ "5", "lexNumber",
+ "6", "lexNumber",
+ "7", "lexNumber",
+ "8", "lexNumber",
+ "9", "lexNumber",
+ "q\"", "lexDelimitedString",
+ "q{", "lexTokenString",
+ "r\"", "lexWysiwygString",
+ "x\"", "lexHexString",
+ " ", "lexWhitespace",
+ "\t", "lexWhitespace",
+ "\r", "lexWhitespace",
+ "\n", "lexWhitespace",
+ "\u2028", "lexLongNewline",
+ "\u2029", "lexLongNewline",
+ "#!", "lexScriptLine"
+ ];
+
+ mixin Lexer!(R, IdType, Token, lexIdentifier, staticTokens,
+ dynamicTokens, pseudoTokens, pseudoTokenHandlers, possibleDefaultTokens);
+
+ private alias typeof(range).Mark Mark;
this(R range)
{
- registerPostProcess!"\""(&lexStringLiteral);
- registerPostProcess!"`"(&lexWysiwygString);
- registerPostProcess!"//"(&lexSlashSlashComment);
- registerPostProcess!"/*"(&lexSlashStarComment);
- registerPostProcess!"/+"(&lexSlashPlusComment);
- registerPostProcess!"."(&lexDot);
- registerPostProcess!"'"(&lexCharacterLiteral);
- registerPostProcess!"0"(&lexNumber);
- registerPostProcess!"1"(&lexNumber);
- registerPostProcess!"2"(&lexNumber);
- registerPostProcess!"3"(&lexNumber);
- registerPostProcess!"4"(&lexNumber);
- registerPostProcess!"5"(&lexNumber);
- registerPostProcess!"6"(&lexNumber);
- registerPostProcess!"7"(&lexNumber);
- registerPostProcess!"8"(&lexNumber);
- registerPostProcess!"9"(&lexNumber);
- registerPostProcess!"#"(&lexNumber);
- registerPostProcess!"q\""(&lexDelimitedString);
- registerPostProcess!"q{"(&lexTokenString);
- registerPostProcess!"r\""(&lexWysiwygString);
- registerPostProcess!"x\""(&lexHexString);
- registerPostProcess!" "(&lexWhitespace);
- registerPostProcess!"\t"(&lexWhitespace);
- registerPostProcess!"\r"(&lexWhitespace);
- registerPostProcess!"\n"(&lexWhitespace);
- registerPostProcess!"\u2028"(&lexLongNewline);
- registerPostProcess!"\u2029"(&lexLongNewline);
- this.range = RangeType(range);
- popFront();
+ this.range = LexerRange!(typeof(buffer(range)))(buffer(range));
}
- bool isWhitespace() pure const nothrow
+ bool isWhitespace() pure /*const*/ nothrow
{
switch (range.front)
{
@@ -370,10 +375,10 @@ public struct DLexer(R)
case '\t':
return true;
case 0xe2:
- if (!range.canPeek(2))
- return false;
- return range.peek() == 0x80
- && (range.peek(2) == 0xa8 || range.peek(2) == 0xa9);
+ auto peek = range.lookahead(2);
+ return peek.length == 2
+ && peek[0] == 0x80
+ && (peek[1] == 0xa8 || peek[1] == 0xa9);
default:
return false;
}
@@ -398,8 +403,9 @@ public struct DLexer(R)
range.incrementLine();
return;
case 0xe2:
- if (range.canPeek(2) && range.peek() == 0x80
- && (range.peek(2) == 0xa8 || range.peek(2) == 0xa9))
+ auto lookahead = range.lookahead(3);
+ if (lookahead.length == 3 && lookahead[1] == 0x80
+ && (lookahead[2] == 0xa8 || lookahead[2] == 0xa9))
{
range.popFront();
range.popFront();
@@ -420,7 +426,7 @@ public struct DLexer(R)
Token lexWhitespace() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
loop: do
{
switch (range.front)
@@ -440,11 +446,12 @@ public struct DLexer(R)
range.popFront();
break;
case 0xe2:
- if (!range.canPeek(2))
+ auto lookahead = range.lookahead(3);
+ if (lookahead.length != 3)
break loop;
- if (range.peek() != 0x80)
+ if (lookahead[1] != 0x80)
break loop;
- if (range.peek(2) == 0xa8 || range.peek(2) == 0xa9)
+ if (lookahead[2] == 0xa8 || lookahead[2] == 0xa9)
{
range.popFront();
range.popFront();
@@ -457,36 +464,43 @@ public struct DLexer(R)
break loop;
}
} while (!range.empty);
- return Token(tok!"whitespace", cast(string) range.getMarked(), range.line,
+ return Token(tok!"whitespace", cast(string) range.slice(mark), range.line,
range.column, range.index);
}
Token lexNumber() pure nothrow
{
- range.mark();
- if (range.front == '0')
+ auto mark = range.mark();
+ auto lookahead = range.lookahead(1);
+ if (range.front == '0' && lookahead.length == 1)
{
- switch (range.peek())
+ switch (lookahead[0])
{
case 'x':
case 'X':
range.popFront();
range.popFront();
- return lexHex();
+ return lexHex(mark);
case 'b':
case 'B':
range.popFront();
range.popFront();
- return lexBinary();
+ return lexBinary(mark);
default:
- return lexDecimal();
+ return lexDecimal(mark);
}
}
else
- return lexDecimal();
+ return lexDecimal(mark);
}
Token lexHex() pure nothrow
+ {
+ auto mark = range.mark();
+ return lexHex(mark);
+ }
+
+ Token lexHex(Mark mark) pure nothrow
{
IdType type = tok!"intLiteral";
bool foundDot;
@@ -526,7 +540,7 @@ public struct DLexer(R)
case '.':
if (foundDot)
break hexLoop;
- if (range.canPeek() && range.peek() == '.')
+ if (range.lookahead(1).length && range.lookahead(1)[0] == '.')
break hexLoop;
range.popFront();
foundDot = true;
@@ -536,11 +550,17 @@ public struct DLexer(R)
break hexLoop;
}
}
- return Token(type, cast(string) range.getMarked(), range.line, range.column,
+ return Token(type, cast(string) range.slice(mark), range.line, range.column,
range.index);
}
Token lexBinary() pure nothrow
+ {
+ auto mark = range.mark();
+ return lexBinary(mark);
+ }
+
+ Token lexBinary(Mark mark) pure nothrow
{
IdType type = tok!"intLiteral";
binaryLoop: while (!range.empty)
@@ -561,11 +581,11 @@ public struct DLexer(R)
break binaryLoop;
}
}
- return Token(type, cast(string) range.getMarked(), range.line, range.column,
+ return Token(type, cast(string) range.slice(mark), range.line, range.column,
range.index);
}
- Token lexDecimal() pure nothrow
+ Token lexDecimal(Mark mark) pure nothrow
{
bool foundDot = range.front == '.';
IdType type = tok!"intLiteral";
@@ -608,16 +628,17 @@ public struct DLexer(R)
case '.':
if (foundDot)
break decimalLoop;
- if (range.canPeek() && range.peek() == '.')
+ auto lookahead = range.lookahead(1);
+ if (lookahead.length == 1 && lookahead[0] == '.')
break decimalLoop;
else
{
// The following bit of silliness tries to tell the
// difference between "int dot identifier" and
// "double identifier".
- if (range.canPeek())
+ if (lookahead.length == 1)
{
- switch (range.peek())
+ switch (lookahead[0])
{
case '0': .. case '9':
goto doubleLiteral;
@@ -638,7 +659,7 @@ public struct DLexer(R)
break decimalLoop;
}
}
- return Token(type, cast(string) range.getMarked(), range.line, range.column,
+ return Token(type, cast(string) range.slice(mark), range.line, range.column,
range.index);
}
@@ -749,7 +770,7 @@ public struct DLexer(R)
Token lexSlashStarComment() pure
{
- range.mark();
+ auto mark = range.mark();
IdType type = tok!"comment";
range.popFront();
range.popFront();
@@ -767,13 +788,13 @@ public struct DLexer(R)
else
popFrontWhitespaceAware();
}
- return Token(type, cast(string) range.getMarked(), range.line, range.column,
+ return Token(type, cast(string) range.slice(mark), range.line, range.column,
range.index);
}
Token lexSlashSlashComment() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
IdType type = tok!"comment";
range.popFront();
range.popFront();
@@ -783,13 +804,13 @@ public struct DLexer(R)
break;
range.popFront();
}
- return Token(type, cast(string) range.getMarked(), range.line, range.column,
+ return Token(type, cast(string) range.slice(mark), range.line, range.column,
range.index);
}
Token lexSlashPlusComment() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
IdType type = tok!"comment";
range.popFront();
range.popFront();
@@ -817,13 +838,13 @@ public struct DLexer(R)
else
popFrontWhitespaceAware();
}
- return Token(type, cast(string) range.getMarked(), range.line, range.column,
+ return Token(type, cast(string) range.slice(mark), range.line, range.column,
range.index);
}
Token lexStringLiteral() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
range.popFront();
while (true)
{
@@ -846,13 +867,13 @@ public struct DLexer(R)
}
IdType type = tok!"stringLiteral";
lexStringSuffix(type);
- return Token(type, cast(string) range.getMarked(), range.line, range.column,
+ return Token(type, cast(string) range.slice(mark), range.line, range.column,
range.index);
}
Token lexWysiwygString() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
IdType type = tok!"stringLiteral";
bool backtick = range.front == '`';
if (backtick)
@@ -900,7 +921,7 @@ public struct DLexer(R)
}
}
lexStringSuffix(type);
- return Token(type, cast(string) range.getMarked(), range.line, range.column,
+ return Token(type, cast(string) range.slice(mark), range.line, range.column,
range.index);
}
@@ -922,7 +943,7 @@ public struct DLexer(R)
Token lexDelimitedString() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
range.popFront();
range.popFront();
ElementEncodingType!R open;
@@ -933,29 +954,29 @@ public struct DLexer(R)
open = '<';
close = '>';
range.popFront();
- return lexNormalDelimitedString(open, close);
+ return lexNormalDelimitedString(mark, open, close);
case '{':
open = '{';
close = '}';
range.popFront();
- return lexNormalDelimitedString(open, close);
+ return lexNormalDelimitedString(mark, open, close);
case '[':
open = '[';
close = ']';
range.popFront();
- return lexNormalDelimitedString(open, close);
+ return lexNormalDelimitedString(mark, open, close);
case '(':
open = '(';
close = ')';
range.popFront();
- return lexNormalDelimitedString(open, close);
+ return lexNormalDelimitedString(mark, open, close);
default:
return lexHeredocString();
}
}
- Token lexNormalDelimitedString(ElementEncodingType!RangeType open,
- ElementEncodingType!RangeType close) pure nothrow
+ Token lexNormalDelimitedString(Mark mark, ElementEncodingType!R open,
+ ElementEncodingType!R close) pure nothrow
{
int depth = 1;
while (!range.empty && depth > 0)
@@ -985,7 +1006,7 @@ public struct DLexer(R)
}
IdType type = tok!"stringLiteral";
lexStringSuffix(type);
- return Token(type, cast(string) range.getMarked(), range.line, range.column, range.index);
+ return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index);
}
Token lexHeredocString() pure nothrow
@@ -1024,7 +1045,7 @@ public struct DLexer(R)
Token lexHexString() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
range.popFront();
range.popFront();
@@ -1055,7 +1076,7 @@ public struct DLexer(R)
IdType type = tok!"stringLiteral";
lexStringSuffix(type);
- return Token(type, cast(string) range.getMarked(), range.line, range.column,
+ return Token(type, cast(string) range.slice(mark), range.line, range.column,
range.index);
}
@@ -1154,7 +1175,7 @@ public struct DLexer(R)
Token lexCharacterLiteral() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
range.popFront();
if (range.front == '\\')
{
@@ -1164,7 +1185,7 @@ public struct DLexer(R)
else if (range.front == '\'')
{
range.popFront();
- return Token(tok!"characterLiteral", cast(string) range.getMarked(),
+ return Token(tok!"characterLiteral", cast(string) range.slice(mark),
range.line, range.column, range.index);
}
else if (range.front & 0x80)
@@ -1182,7 +1203,7 @@ public struct DLexer(R)
if (range.front == '\'')
{
range.popFront();
- return Token(tok!"characterLiteral", cast(string) range.getMarked(),
+ return Token(tok!"characterLiteral", cast(string) range.slice(mark),
range.line, range.column, range.index);
}
else
@@ -1194,30 +1215,31 @@ public struct DLexer(R)
Token lexIdentifier() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
while (!range.empty && !isSeparating(range.front))
{
range.popFront();
}
- return Token(tok!"identifier", cast(string) range.getMarked(), range.index,
+ return Token(tok!"identifier", cast(string) range.slice(mark), range.index,
range.line, range.column);
}
Token lexDot() pure nothrow
{
- if (!range.canPeek)
+ auto lookahead = range.lookahead(1);
+ if (lookahead.length == 0)
{
range.popFront();
return Token(tok!".", null, range.line, range.column, range.index);
}
- switch (range.peek())
+ switch (lookahead[0])
{
case '0': .. case '9':
return lexNumber();
case '.':
range.popFront();
range.popFront();
- if (range.front == '.')
+ if (!range.empty && range.front == '.')
{
range.popFront();
return Token(tok!"...", null, range.line, range.column, range.index);
@@ -1232,16 +1254,21 @@ public struct DLexer(R)
Token lexLongNewline() pure nothrow
{
- range.mark();
+ auto mark = range.mark();
range.popFront();
range.popFront();
range.popFront();
range.incrementLine();
- return Token(tok!"whitespace", cast(string) range.getMarked(), range.line,
+ return Token(tok!"whitespace", cast(string) range.slice(mark), range.line,
range.column, range.index);
}
+
+ Token lexScriptLine() pure nothrow
+ {
+ assert(false, "Not implemented");
+ }
- bool isSeparating(C)(C c) nothrow pure @safe
+ bool isSeparating(ElementType!R c) nothrow pure @safe
{
if (c <= 0x2f) return true;
if (c >= ':' && c <= '@') return true;
diff --git a/stdx/d/parser.d b/stdx/d/parser.d
index 2519171..d80f361 100644
--- a/stdx/d/parser.d
+++ b/stdx/d/parser.d
@@ -1,62 +1,5 @@
// Written in the D programming language
-/**
- * This module contains a _parser for D source code.
- *
- * Grammar:
- * The grammar format used in the documentation of this module generally follows
- * the format used by the ANTLR _parser generator.
- * $(UL
- * $(LI Tokens and rules can be grouped by parenthesis.)
- * $(LI An asterisk (*) indicates that the previous rule, token, or group
- * can repeat 0 or more times.)
- * $(LI A question mark (?) indicates that the previous rule, token, or group
- * will be present either 0 or 1 times.)
- * $(LI A plus sign (+) indicates that the previous rule, token, or group
- * repeats one or more times. (i.e. it is optional))
- * $(LI If there is more than one way to match a rule, the alternatives will be
- * separated by a pipe character (|).)
- * $(LI Rule definitions begin with the rule name followed by a colon (:). Rule
- * definitions end with a semicolon (;).)
- * )
- *
- * The grammar for D starts with the $(LINK2 #module, module) rule.
- *
- * Examples:
- * ---
- * import std.d.lexer;
- * import std.d.parser;
- * import std.d.ast;
- * import std.array;
- *
- * string sourceCode = q{
- * import std.stdio;
- *
- * void main()
- * {
- * writeln("Hello, World.");
- * }
- * }c;
- * void main()
- * {
- * LexerConfig config;
- * auto tokens = byToken(cast(ubyte[]) sourceCode, config).array();
- * Module mod = parseModule(tokens);
- * // Use module here...
- * }
- * ---
- *
- * Copyright: Brian Schott 2013
- * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
- * Authors: Brian Schott
- * Source: $(PHOBOSSRC std/d/_parser.d)
- * Macros:
- * GRAMMAR = $0
- * RULEDEF = $0
- * RULE = $0
- * LITERAL = $0
- */
-
module stdx.d.parser;
import stdx.d.lexer;
@@ -2162,7 +2105,7 @@ class ClassFour(A, B) if (someTest()) : Super {}}c;
* Parses a ForStatement
*
* $(GRAMMAR $(RULEDEF forStatement):
- * $(LITERAL 'for') $(LITERAL '$(LPAREN)') $(RULE declarationOrStatement) $(RULE expression)? $(LITERAL ';') $(RULE expression)? $(LITERAL '$(RPAREN)') $(RULE statementNoCaseNoDefault)
+ * $(LITERAL 'for') $(LITERAL '$(LPAREN)') $(RULE declarationOrStatement) $(RULE expression)? $(LITERAL ';') $(RULE expression)? $(LITERAL '$(RPAREN)') $(RULE declarationOrStatement)
* ;)
*/
ForStatement parseForStatement()
@@ -2176,7 +2119,7 @@ class ClassFour(A, B) if (someTest()) : Super {}}c;
if (currentIs(tok!";"))
advance();
else
- node.declarationOrStatement = parseDeclarationOrStatement();
+ node.initialization = parseDeclarationOrStatement();
if (currentIs(tok!";"))
advance();
@@ -2192,8 +2135,8 @@ class ClassFour(A, B) if (someTest()) : Super {}}c;
error("Statement expected", false);
return node; // this line makes DCD better
}
- node.statementNoCaseNoDefault = parseStatementNoCaseNoDefault();
- if (node.statementNoCaseNoDefault is null) return null;
+ node.declarationOrStatement = parseDeclarationOrStatement();
+ if (node.declarationOrStatement is null) return null;
return node;
}
@@ -5917,7 +5860,7 @@ q{doStuff(5)}c;
error("Statement expected", false);
return node; // this line makes DCD better
}
- node.statementNoCaseNoDefault = parseStatementNoCaseNoDefault();
+ node.declarationOrStatement = parseDeclarationOrStatement();
return node;
}
diff --git a/stdx/lexer.d b/stdx/lexer.d
index da94701..980e530 100644
--- a/stdx/lexer.d
+++ b/stdx/lexer.d
@@ -10,12 +10,15 @@
*/
module stdx.lexer;
+
import std.typecons;
import std.algorithm;
import std.range;
import std.traits;
import std.conv;
import std.math;
+import dpick.buffer.buffer;
+import dpick.buffer.traits;
template TokenIdType(alias staticTokens, alias dynamicTokens,
alias possibleDefaultTokens)
@@ -34,12 +37,12 @@ string TokenStringRepresentation(IdType, alias staticTokens, alias dynamicTokens
{
if (type == 0)
return "!ERROR!";
- else if (type < staticTokens.length)
+ else if (type < staticTokens.length + 1)
return staticTokens[type - 1];
- else if (type < staticTokens.length + possibleDefaultTokens.length)
- return possibleDefaultTokens[type - staticTokens.length];
- else if (type < staticTokens.length + possibleDefaultTokens.length + dynamicTokens.length)
- return dynamicTokens[type - staticTokens.length - possibleDefaultTokens.length];
+ else if (type < staticTokens.length + possibleDefaultTokens.length + 1)
+ return possibleDefaultTokens[type - staticTokens.length - 1];
+ else if (type < staticTokens.length + possibleDefaultTokens.length + dynamicTokens.length + 1)
+ return dynamicTokens[type - staticTokens.length - possibleDefaultTokens.length - 1];
else
return null;
}
@@ -70,14 +73,16 @@ template TokenId(IdType, alias staticTokens, alias dynamicTokens,
enum ii = possibleDefaultTokens.countUntil(symbol);
static if (ii >= 0)
{
- enum id = ii + staticTokens.length;
+ enum id = ii + staticTokens.length + 1;
static assert (id >= 0 && id < IdType.max, "Invalid token: " ~ symbol);
alias id TokenId;
}
else
{
enum dynamicId = dynamicTokens.countUntil(symbol);
- enum id = dynamicId >= 0 ? i + staticTokens.length + possibleDefaultTokens.length + dynamicId : -1;
+ enum id = dynamicId >= 0
+ ? i + staticTokens.length + possibleDefaultTokens.length + dynamicId + 1
+ : -1;
static assert (id >= 0 && id < IdType.max, "Invalid token: " ~ symbol);
alias id TokenId;
}
@@ -113,13 +118,10 @@ struct TokenStructure(IDType)
IDType type;
}
-mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFunction,
+mixin template Lexer(R, IDType, Token, alias defaultTokenFunction,
alias staticTokens, alias dynamicTokens, alias pseudoTokens,
- alias possibleDefaultTokens) if (isForwardRange!R)
+ alias pseudoTokenHandlers, alias possibleDefaultTokens)
{
- enum size_t lookAhead = chain(staticTokens, pseudoTokens).map!"a.length".reduce!"max(a, b)"();
- alias PeekRange!(R, lookAhead) RangeType;
-
static string generateCaseStatements(string[] tokens, size_t offset = 0)
{
string code;
@@ -141,9 +143,9 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun
code ~= generateLeaf(tokens[i], indent ~ " ");
else
{
- code ~= indent ~ " if (!range.canPeek(" ~ text(tokens[i].length - 1) ~ "))\n";
+ code ~= indent ~ " if (range.lookahead(" ~ text(tokens[i].length) ~ ").length == 0)\n";
code ~= indent ~ " goto outer_default;\n";
- code ~= indent ~ " if (range.startsWith(\"" ~ escape(tokens[i]) ~ "\"))\n";
+ code ~= indent ~ " if (range.lookahead(" ~ text(tokens[i].length) ~ ") == \"" ~ escape(tokens[i]) ~ "\")\n";
code ~= indent ~ " {\n";
code ~= generateLeaf(tokens[i], indent ~ " ");
code ~= indent ~ " }\n";
@@ -153,11 +155,11 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun
}
else
{
- code ~= indent ~ " if (!range.canPeek(" ~ text(offset + 1) ~ "))\n";
+ code ~= indent ~ " if (range.lookahead(" ~ text(offset + 2) ~ ").length == 0)\n";
code ~= indent ~ " {\n";
code ~= generateLeaf(tokens[i][0 .. offset + 1], indent ~ " ");
code ~= indent ~ " }\n";
- code ~= indent ~ " switch (range.peek(" ~ text(offset + 1) ~ "))\n";
+ code ~= indent ~ " switch (range.lookahead(" ~ text(offset + 2) ~ ")[" ~ text(offset + 1) ~ "])\n";
code ~= indent ~ " {\n";
code ~= generateCaseStatements(tokens[i .. j], offset + 1);
code ~= indent ~ " default:\n";
@@ -172,6 +174,8 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun
static string generateLeaf(string token, string indent)
{
+ static assert (pseudoTokenHandlers.length % 2 == 0,
+ "Each pseudo-token must have a matching function name.");
string code;
if (staticTokens.countUntil(token) >= 0)
{
@@ -179,13 +183,13 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun
code ~= indent ~ "range.popFront();\n";
else
code ~= indent ~ "range.popFrontN(" ~ text(token.length) ~ ");\n";
- code ~= indent ~ "return Token(tok!\"" ~ escape(token) ~"\", null, range.line, range.column, range.index);\n";
+ code ~= indent ~ "return Token(tok!\"" ~ escape(token) ~ "\", null, range.line, range.column, range.index);\n";
}
else if (pseudoTokens.countUntil(token) >= 0)
- code ~= indent ~ "return postProcess(pseudoTok!\"" ~ escape(token) ~"\");\n";
+ code ~= indent ~ "return " ~ pseudoTokenHandlers[pseudoTokenHandlers.countUntil(token) + 1] ~ "();\n";
else if (possibleDefaultTokens.countUntil(token) >= 0)
{
- code ~= indent ~ "if (!range.canPeek(" ~ text(token.length) ~ ") || isSeparating(range.peek(" ~ text(token.length) ~ ")))\n";
+ code ~= indent ~ "if (range.lookahead(" ~ text(token.length + 1) ~ ").length == 0 || isSeparating(range.lookahead(" ~ text(token.length + 1) ~ ")[" ~ text(token.length) ~ "]))\n";
code ~= indent ~ "{\n";
if (token.length == 1)
code ~= indent ~ " range.popFront();\n";
@@ -211,18 +215,11 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun
_front = advance();
}
- bool empty() const nothrow @property
+ bool empty() pure const nothrow @property
{
return _front.type == tok!"\0";
}
- template pseudoTok(string symbol)
- {
- static assert (pseudoTokens.countUntil(symbol) >= 0);
- enum index = cast(IDType) pseudoTokens.countUntil(symbol);
- alias index pseudoTok;
- }
-
static string escape(string input)
{
string rVal;
@@ -267,224 +264,36 @@ mixin template Lexer(R, IDType, Token, alias isSeparating, alias defaultTokenFun
return rVal;
}
- void registerPostProcess(alias t)(Token delegate() pure fun)
- {
- post[pseudoTok!t] = fun;
- }
-
- Token postProcess(IDType i) pure
- {
- assert (post[i] !is null, "No post-processing function registered for " ~ pseudoTokens[i]);
- return post[i]();
- }
-
- Token delegate() pure [pseudoTokens.length] post;
- RangeType range;
+ LexerRange!(typeof(buffer(R.init))) range;
Token _front;
}
-struct PeekRange(R, size_t peekSupported = 1) if (isRandomAccessRange!R
- && isForwardRange!R && hasSlicing!R)
+struct LexerRange(BufferType) if (isBuffer!BufferType)
{
-public:
-
- this(R range)
+ this(BufferType r)
{
- this.range = range;
+ this.range = r;
+ index = 0;
+ column = 1;
+ line = 1;
}
- invariant()
+ void popFront() pure
{
- import std.string;
- if (range.length != 6190)
- assert (false, format("range.length = %d %s", range.length, cast(char[]) range[0 .. 100]));
+ index++;
+ column++;
+ range.popFront();
}
- bool startsWith(string s)
- {
- return index + s.length < range.length
- && (cast(const(ubyte[])) s) == range[index .. index + s.length];
- }
-
- bool empty() pure nothrow const @property
- {
- return _index >= range.length;
- }
-
- const(ElementType!R) front() pure nothrow const @property
- in
- {
- assert (!empty);
- }
- body
- {
- return range[_index];
- }
-
- void popFront() pure nothrow
- {
- _index++;
- _column++;
- }
-
- void popFrontN(size_t n) pure nothrow
- {
- foreach (i; 0 .. n)
- popFront();
- }
-
- const(ElementType!R) peek(int offset = 1) pure nothrow const
- in
- {
- assert (canPeek(offset));
- }
- body
- {
- return range[_index + offset];
- }
-
- bool canPeek(size_t offset = 1) pure nothrow const
- {
- return _index + offset < range.length;
- }
-
- void mark() nothrow pure
- {
- markBegin = _index;
- }
-
- const(R) getMarked() pure nothrow const
- {
- return range[markBegin .. _index];
- }
-
void incrementLine() pure nothrow
{
- _column = 1;
- _line++;
+ column = 1;
+ line++;
}
-
- size_t line() pure nothrow const @property { return _line; }
- size_t column() pure nothrow const @property { return _column; }
- size_t index() pure nothrow const @property { return _index; }
-
-private:
- size_t markBegin;
- size_t _column = 1;
- size_t _line = 1;
- size_t _index = 0;
- R range;
+
+ BufferType range;
+ alias range this;
+ size_t index;
+ size_t column;
+ size_t line;
}
-
-//struct PeekRange(R, size_t peekSupported = 1)
-// if (!isRandomAccessRange!R && isForwardRange!R)
-//{
-//public:
-//
-// this(R range)
-// {
-// this.range = range;
-// for (size_t i = 0; !this.range.empty && i < peekSupported; i++)
-// {
-// rangeSizeCount++;
-// buffer[i] = this.range.front;
-// range.popFront();
-// }
-// }
-//
-// ElementType!R front() const @property
-// in
-// {
-// assert (!empty);
-// }
-// body
-// {
-// return buffer[bufferIndex];
-// }
-//
-// void popFront()
-// in
-// {
-// assert (!empty);
-// }
-// body
-// {
-// index++;
-// column++;
-// count++;
-// bufferIndex = bufferIndex + 1 > buffer.length ? 0 : bufferIndex + 1;
-// if (marking)
-// markBuffer.put(buffer[bufferIndex]);
-// if (!range.empty)
-// {
-// buffer[bufferIndex + peekSupported % buffer.length] = range.front();
-// range.popFront();
-// rangeSizeCount++;
-// }
-// }
-//
-// bool empty() const nothrow pure @property
-// {
-// return rangeSizeCount == count;
-// }
-//
-// ElementType!R peek(int offset = 1) pure nothrow const
-// in
-// {
-// assert (canPeek(offset));
-// }
-// body
-// {
-// return buffer[(bufferIndex + offset) % buffer.length];
-// }
-//
-// bool canPeek(size_t int offset = 1) pure nothrow const
-// {
-// return offset <= peekSupported && count + offset <= rangeSizeCount;
-// }
-//
-// typeof(this) save() @property
-// {
-// typeof(this) newRange;
-// newRange.count = count;
-// newRange.rangeSizeCount = count;
-// newRange.buffer = buffer.dup;
-// newRange.bufferIndex = bufferIndex;
-// newRange.range = range.save;
-// return newRange;
-// }
-//
-// void mark()
-// {
-// marking = true;
-// markBuffer.clear();
-// }
-//
-// ElementEncodingType!R[] getMarked()
-// {
-// marking = false;
-// return markBuffer.data;
-// }
-//
-// void incrementLine() pure nothrow
-// {
-// _column = 1;
-// _line++;
-// }
-//
-// size_t line() pure nothrow const @property { return _line; }
-// size_t column() pure nothrow const @property { return _column; }
-// size_t index() pure nothrow const @property { return _index; }
-//
-//private:
-// auto markBuffer = appender!(ElementType!R[])();
-// bool marking;
-// size_t count;
-// size_t rangeSizeCount;
-// ElementType!(R)[peekSupported + 1] buffer;
-// size_t bufferIndex;
-// size_t _column = 1;
-// size_t _line = 1;
-// size_t _index = 0;
-// R range;
-//}