Finished conversion to new lexer

This commit is contained in:
Hackerpilot 2014-01-12 02:45:37 +00:00
parent 844b626ed5
commit 070f9ac83b
6 changed files with 172 additions and 89 deletions

View File

@ -24,7 +24,7 @@ void printCtags(File output, string[] fileNames)
File f = File(fileName); File f = File(fileName);
auto bytes = uninitializedArray!(ubyte[])(to!size_t(f.size)); auto bytes = uninitializedArray!(ubyte[])(to!size_t(f.size));
f.rawRead(bytes); f.rawRead(bytes);
auto tokens = DLexer!(typeof(bytes))(bytes); auto tokens = byToken(bytes);
Module m = parseModule(tokens.array, fileName, &doNothing); Module m = parseModule(tokens.array, fileName, &doNothing);
auto printer = new CTagsPrinter; auto printer = new CTagsPrinter;
printer.fileName = fileName; printer.fileName = fileName;
@ -40,9 +40,6 @@ void printCtags(File output, string[] fileNames)
class CTagsPrinter : ASTVisitor class CTagsPrinter : ASTVisitor
{ {
alias ASTVisitor.visit visit;
override void visit(ClassDeclaration dec) override void visit(ClassDeclaration dec)
{ {
tagLines ~= "%s\t%s\t%d;\"\tc%s\n".format(dec.name.text, fileName, dec.name.line, context); tagLines ~= "%s\t%s\t%d;\"\tc%s\n".format(dec.name.text, fileName, dec.name.line, context);
@ -134,6 +131,8 @@ class CTagsPrinter : ASTVisitor
} }
dec.accept(this); dec.accept(this);
} }
alias ASTVisitor.visit visit;
string fileName; string fileName;
string[] tagLines; string[] tagLines;

@ -1 +1 @@
Subproject commit eb14a5244153c0e13ceca79f292838dfe2ac9bfb Subproject commit f63a843e9c0ce8db7fd897684fe323697255d87d

2
main.d
View File

@ -234,7 +234,7 @@ options:
Prints the number of logical lines of code in the given Prints the number of logical lines of code in the given
source files. If no files are specified, input is read from stdin. source files. If no files are specified, input is read from stdin.
--tokenCount | t [sourceFiles] --tokenCount | -t [sourceFiles]
Prints the number of tokens in the given source files. If no files are Prints the number of tokens in the given source files. If no files are
specified, input is read from stdin. specified, input is read from stdin.

View File

@ -943,7 +943,7 @@ public:
destructor, staticConstructor, staticDestructor, destructor, staticConstructor, staticDestructor,
sharedStaticDestructor, sharedStaticConstructor, sharedStaticDestructor, sharedStaticConstructor,
conditionalDeclaration, pragmaDeclaration, versionSpecification, conditionalDeclaration, pragmaDeclaration, versionSpecification,
declarations)); invariant_, postblit, declarations));
} }
/** */ Attribute[] attributes; /** */ Attribute[] attributes;

View File

@ -18,7 +18,7 @@ private enum staticTokens = [
private enum pseudoTokens = [ private enum pseudoTokens = [
"\"", "`", "//", "/*", "/+", ".", "'", "0", "1", "2", "3", "4", "5", "6", "\"", "`", "//", "/*", "/+", ".", "'", "0", "1", "2", "3", "4", "5", "6",
"7", "8", "9", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!", "7", "8", "9", "q\"", "q{", "r\"", "x\"", " ", "\t", "\r", "\n", "#!",
"\u2028", "\u2029" "#line", "\u2028", "\u2029"
]; ];
private enum possibleDefaultTokens = [ private enum possibleDefaultTokens = [
@ -343,15 +343,15 @@ public struct DLexer(R)
".", "lexDot", ".", "lexDot",
"'", "lexCharacterLiteral", "'", "lexCharacterLiteral",
"0", "lexNumber", "0", "lexNumber",
"1", "lexNumber", "1", "lexDecimal",
"2", "lexNumber", "2", "lexDecimal",
"3", "lexNumber", "3", "lexDecimal",
"4", "lexNumber", "4", "lexDecimal",
"5", "lexNumber", "5", "lexDecimal",
"6", "lexNumber", "6", "lexDecimal",
"7", "lexNumber", "7", "lexDecimal",
"8", "lexNumber", "8", "lexDecimal",
"9", "lexNumber", "9", "lexDecimal",
"q\"", "lexDelimitedString", "q\"", "lexDelimitedString",
"q{", "lexTokenString", "q{", "lexTokenString",
"r\"", "lexWysiwygString", "r\"", "lexWysiwygString",
@ -362,7 +362,8 @@ public struct DLexer(R)
"\n", "lexWhitespace", "\n", "lexWhitespace",
"\u2028", "lexLongNewline", "\u2028", "lexLongNewline",
"\u2029", "lexLongNewline", "\u2029", "lexLongNewline",
"#!", "lexScriptLine" "#!", "lexScriptLine",
"#line", "lexSpecialTokenSequence"
]; ];
mixin Lexer!(R, IdType, Token, lexIdentifier, staticTokens, mixin Lexer!(R, IdType, Token, lexIdentifier, staticTokens,
@ -437,7 +438,7 @@ public struct DLexer(R)
Token lexWhitespace() pure nothrow Token lexWhitespace() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
loop: do loop: do
{ {
switch (range.front) switch (range.front)
@ -475,13 +476,13 @@ public struct DLexer(R)
break loop; break loop;
} }
} while (!range.empty); } while (!range.empty);
return Token(tok!"whitespace", cast(string) range.slice(mark), range.line, return Token(tok!"whitespace", cast(string) range.slice(mark), line,
range.column, range.index); column, index);
} }
Token lexNumber() pure nothrow Token lexNumber() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
auto lookahead = range.lookahead(2); auto lookahead = range.lookahead(2);
if (range.front == '0' && lookahead.length == 2) if (range.front == '0' && lookahead.length == 2)
{ {
@ -491,27 +492,27 @@ public struct DLexer(R)
case 'X': case 'X':
range.popFront(); range.popFront();
range.popFront(); range.popFront();
return lexHex(mark); return lexHex(mark, line, column, index);
case 'b': case 'b':
case 'B': case 'B':
range.popFront(); range.popFront();
range.popFront(); range.popFront();
return lexBinary(mark); return lexBinary(mark, line, column, index);
default: default:
return lexDecimal(mark); return lexDecimal(mark, line, column, index);
} }
} }
else else
return lexDecimal(mark); return lexDecimal(mark, line, column, index);
} }
Token lexHex() pure nothrow Token lexHex() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
return lexHex(mark); return lexHex(mark, line, column, index);
} }
Token lexHex(Mark mark) pure nothrow Token lexHex(Mark mark, size_t line, size_t column, size_t index) pure nothrow
{ {
IdType type = tok!"intLiteral"; IdType type = tok!"intLiteral";
bool foundDot; bool foundDot;
@ -556,17 +557,17 @@ public struct DLexer(R)
break hexLoop; break hexLoop;
} }
} }
return Token(type, cast(string) range.slice(mark), range.line, range.column, return Token(type, cast(string) range.slice(mark), line, column,
range.index); index);
} }
Token lexBinary() pure nothrow Token lexBinary() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
return lexBinary(mark); return lexBinary(mark, line, column, index);
} }
Token lexBinary(Mark mark) pure nothrow Token lexBinary(Mark mark, size_t line, size_t column, size_t index) pure nothrow
{ {
IdType type = tok!"intLiteral"; IdType type = tok!"intLiteral";
binaryLoop: while (!range.empty) binaryLoop: while (!range.empty)
@ -587,11 +588,17 @@ public struct DLexer(R)
break binaryLoop; break binaryLoop;
} }
} }
return Token(type, cast(string) range.slice(mark), range.line, range.column, return Token(type, cast(string) range.slice(mark), line, column,
range.index); index);
} }
Token lexDecimal(Mark mark) pure nothrow Token lexDecimal()
{
mixin (tokenStart);
return lexDecimal(mark, line, column, index);
}
Token lexDecimal(Mark mark, size_t line, size_t column, size_t index) pure nothrow
{ {
bool foundDot = range.front == '.'; bool foundDot = range.front == '.';
IdType type = tok!"intLiteral"; IdType type = tok!"intLiteral";
@ -665,8 +672,8 @@ public struct DLexer(R)
break decimalLoop; break decimalLoop;
} }
} }
return Token(type, cast(string) range.slice(mark), range.line, range.column, return Token(type, cast(string) range.slice(mark), line, column,
range.index); index);
} }
void lexIntSuffix(ref IdType type) pure nothrow @safe void lexIntSuffix(ref IdType type) pure nothrow @safe
@ -768,15 +775,27 @@ public struct DLexer(R)
} }
} }
Token lexScriptLine() pure
Token lexSpecialTokenSequence() pure nothrow @safe
{ {
assert (false, "Not implemented"); mixin (tokenStart);
while (!range.empty && !isNewline)
range.popFront();
return Token(tok!"scriptLine", cast(string) range.slice(mark),
line, column, index);
}
Token lexSpecialTokenSequence() pure
{
mixin (tokenStart);
while (!range.empty && !isNewline)
range.popFront();
return Token(tok!"specialTokenSequence", cast(string) range.slice(mark),
line, column, index);
} }
Token lexSlashStarComment() pure Token lexSlashStarComment() pure
{ {
auto mark = range.mark(); mixin (tokenStart);
IdType type = tok!"comment"; IdType type = tok!"comment";
range.popFront(); range.popFront();
range.popFront(); range.popFront();
@ -794,13 +813,13 @@ public struct DLexer(R)
else else
popFrontWhitespaceAware(); popFrontWhitespaceAware();
} }
return Token(type, cast(string) range.slice(mark), range.line, range.column, return Token(type, cast(string) range.slice(mark), line, column,
range.index); index);
} }
Token lexSlashSlashComment() pure nothrow Token lexSlashSlashComment() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
IdType type = tok!"comment"; IdType type = tok!"comment";
range.popFront(); range.popFront();
range.popFront(); range.popFront();
@ -810,13 +829,13 @@ public struct DLexer(R)
break; break;
range.popFront(); range.popFront();
} }
return Token(type, cast(string) range.slice(mark), range.line, range.column, return Token(type, cast(string) range.slice(mark), line, column,
range.index); index);
} }
Token lexSlashPlusComment() pure nothrow Token lexSlashPlusComment() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
IdType type = tok!"comment"; IdType type = tok!"comment";
range.popFront(); range.popFront();
range.popFront(); range.popFront();
@ -844,13 +863,13 @@ public struct DLexer(R)
else else
popFrontWhitespaceAware(); popFrontWhitespaceAware();
} }
return Token(type, cast(string) range.slice(mark), range.line, range.column, return Token(type, cast(string) range.slice(mark), line, column,
range.index); index);
} }
Token lexStringLiteral() pure nothrow Token lexStringLiteral() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
range.popFront(); range.popFront();
while (true) while (true)
{ {
@ -873,13 +892,13 @@ public struct DLexer(R)
} }
IdType type = tok!"stringLiteral"; IdType type = tok!"stringLiteral";
lexStringSuffix(type); lexStringSuffix(type);
return Token(type, cast(string) range.slice(mark), range.line, range.column, return Token(type, cast(string) range.slice(mark), line, column,
range.index); index);
} }
Token lexWysiwygString() pure nothrow Token lexWysiwygString() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
IdType type = tok!"stringLiteral"; IdType type = tok!"stringLiteral";
bool backtick = range.front == '`'; bool backtick = range.front == '`';
if (backtick) if (backtick)
@ -927,8 +946,8 @@ public struct DLexer(R)
} }
} }
lexStringSuffix(type); lexStringSuffix(type);
return Token(type, cast(string) range.slice(mark), range.line, range.column, return Token(type, cast(string) range.slice(mark), line, column,
range.index); index);
} }
void lexStringSuffix(ref IdType type) pure void lexStringSuffix(ref IdType type) pure
@ -950,7 +969,7 @@ public struct DLexer(R)
Token lexDelimitedString() pure nothrow Token lexDelimitedString() pure nothrow
{ {
import std.traits; import std.traits;
auto mark = range.mark(); mixin (tokenStart);
range.popFront(); range.popFront();
range.popFront(); range.popFront();
Unqual!(ElementEncodingType!R) open; Unqual!(ElementEncodingType!R) open;
@ -961,29 +980,30 @@ public struct DLexer(R)
open = '<'; open = '<';
close = '>'; close = '>';
range.popFront(); range.popFront();
return lexNormalDelimitedString(mark, open, close); return lexNormalDelimitedString(mark, line, column, index, open, close);
case '{': case '{':
open = '{'; open = '{';
close = '}'; close = '}';
range.popFront(); range.popFront();
return lexNormalDelimitedString(mark, open, close); return lexNormalDelimitedString(mark, line, column, index, open, close);
case '[': case '[':
open = '['; open = '[';
close = ']'; close = ']';
range.popFront(); range.popFront();
return lexNormalDelimitedString(mark, open, close); return lexNormalDelimitedString(mark, line, column, index, open, close);
case '(': case '(':
open = '('; open = '(';
close = ')'; close = ')';
range.popFront(); range.popFront();
return lexNormalDelimitedString(mark, open, close); return lexNormalDelimitedString(mark, line, column, index, open, close);
default: default:
return lexHeredocString(); return lexHeredocString();
} }
} }
Token lexNormalDelimitedString(Mark mark, ElementEncodingType!R open, Token lexNormalDelimitedString(Mark mark, size_t line, size_t column,
ElementEncodingType!R close) pure nothrow size_t index, ElementEncodingType!R open, ElementEncodingType!R close)
pure nothrow
{ {
int depth = 1; int depth = 1;
while (!range.empty && depth > 0) while (!range.empty && depth > 0)
@ -1013,7 +1033,7 @@ public struct DLexer(R)
} }
IdType type = tok!"stringLiteral"; IdType type = tok!"stringLiteral";
lexStringSuffix(type); lexStringSuffix(type);
return Token(type, cast(string) range.slice(mark), range.line, range.column, range.index); return Token(type, cast(string) range.slice(mark), line, column, index);
} }
Token lexHeredocString() pure nothrow Token lexHeredocString() pure nothrow
@ -1023,6 +1043,7 @@ public struct DLexer(R)
Token lexTokenString() pure Token lexTokenString() pure
{ {
mixin (tokenStart);
assert(range.front == 'q'); assert(range.front == 'q');
range.popFront(); range.popFront();
assert(range.front == '{'); assert(range.front == '{');
@ -1055,12 +1076,12 @@ public struct DLexer(R)
} }
IdType type = tok!"stringLiteral"; IdType type = tok!"stringLiteral";
lexStringSuffix(type); lexStringSuffix(type);
return Token(type, app.data, range.line, range.column, range.index); return Token(type, app.data, line, column, index);
} }
Token lexHexString() pure nothrow Token lexHexString() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
range.popFront(); range.popFront();
range.popFront(); range.popFront();
@ -1091,8 +1112,8 @@ public struct DLexer(R)
IdType type = tok!"stringLiteral"; IdType type = tok!"stringLiteral";
lexStringSuffix(type); lexStringSuffix(type);
return Token(type, cast(string) range.slice(mark), range.line, range.column, return Token(type, cast(string) range.slice(mark), line, column,
range.index); index);
} }
bool lexEscapeSequence() pure nothrow bool lexEscapeSequence() pure nothrow
@ -1190,7 +1211,7 @@ public struct DLexer(R)
Token lexCharacterLiteral() pure nothrow Token lexCharacterLiteral() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
range.popFront(); range.popFront();
if (range.front == '\\') if (range.front == '\\')
{ {
@ -1201,7 +1222,7 @@ public struct DLexer(R)
{ {
range.popFront(); range.popFront();
return Token(tok!"characterLiteral", cast(string) range.slice(mark), return Token(tok!"characterLiteral", cast(string) range.slice(mark),
range.line, range.column, range.index); line, column, index);
} }
else if (range.front & 0x80) else if (range.front & 0x80)
{ {
@ -1219,7 +1240,7 @@ public struct DLexer(R)
{ {
range.popFront(); range.popFront();
return Token(tok!"characterLiteral", cast(string) range.slice(mark), return Token(tok!"characterLiteral", cast(string) range.slice(mark),
range.line, range.column, range.index); line, column, index);
} }
else else
{ {
@ -1230,22 +1251,23 @@ public struct DLexer(R)
Token lexIdentifier() pure nothrow Token lexIdentifier() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
while (!range.empty && !isSeparating(range.front)) while (!range.empty && !isSeparating(range.front))
{ {
range.popFront(); range.popFront();
} }
return Token(tok!"identifier", cast(string) range.slice(mark), range.line, return Token(tok!"identifier", cast(string) range.slice(mark), line,
range.column, range.index); column, index);
} }
Token lexDot() pure nothrow Token lexDot() pure nothrow
{ {
mixin (tokenStart);
auto lookahead = range.lookahead(1); auto lookahead = range.lookahead(1);
if (lookahead.length == 0) if (lookahead.length == 0)
{ {
range.popFront(); range.popFront();
return Token(tok!".", null, range.line, range.column, range.index); return Token(tok!".", null, line, column, index);
} }
switch (lookahead[0]) switch (lookahead[0])
{ {
@ -1257,30 +1279,36 @@ public struct DLexer(R)
if (!range.empty && range.front == '.') if (!range.empty && range.front == '.')
{ {
range.popFront(); range.popFront();
return Token(tok!"...", null, range.line, range.column, range.index); return Token(tok!"...", null, line, column, index);
} }
else else
return Token(tok!"..", null, range.line, range.column, range.index); return Token(tok!"..", null, line, column, index);
default: default:
range.popFront(); range.popFront();
return Token(tok!".", null, range.line, range.column, range.index); return Token(tok!".", null, line, column, index);
} }
} }
Token lexLongNewline() pure nothrow Token lexLongNewline() pure nothrow
{ {
auto mark = range.mark(); mixin (tokenStart);
range.popFront(); range.popFront();
range.popFront(); range.popFront();
range.popFront(); range.popFront();
range.incrementLine(); range.incrementLine();
return Token(tok!"whitespace", cast(string) range.slice(mark), range.line, return Token(tok!"whitespace", cast(string) range.slice(mark), line,
range.column, range.index); column, index);
} }
Token lexScriptLine() pure nothrow bool isNewline() pure @safe
{ {
assert(false, "Not implemented"); if (range.front == '\n') return true;
if (range.front == '\r') return true;
auto lookahead = range.lookahead(3);
if (lookahead.length == 0) return false;
if (lookahead.startsWith("\u2028") || lookahead.startsWith("\u2029"))
return true;
return false;
} }
bool isSeparating(ElementType!R c) nothrow pure @safe bool isSeparating(ElementType!R c) nothrow pure @safe
@ -1290,10 +1318,23 @@ public struct DLexer(R)
if (c >= '[' && c <= '^') return true; if (c >= '[' && c <= '^') return true;
if (c >= '{' && c <= '~') return true; if (c >= '{' && c <= '~') return true;
if (c == '`') return true; if (c == '`') return true;
// if (c & 0x80 && (range.lookahead(3).startsWith("\u2028")
// || range.lookahead(3).startsWith("\u2029"))) return true;
return false; return false;
} }
enum tokenStart = q{
size_t index = range.index;
size_t column = range.column;
size_t line = range.line;
auto mark = range.mark();
};
void error(...) pure { void error(...) pure {
} }
void warning(...) pure {
}
} }

View File

@ -96,7 +96,6 @@ class Parser
unittest unittest
{ {
stderr.writeln("Running unittest for parseAliasDeclaration.");
auto sourceCode = auto sourceCode =
q{ q{
alias core.sys.posix.stdio.fileno fileno; alias core.sys.posix.stdio.fileno fileno;
@ -128,6 +127,16 @@ alias core.sys.posix.stdio.fileno fileno;
node.type = parseType(); node.type = parseType();
return node; return node;
} }
unittest
{
auto sourceCode = q{a = abcde!def};
Parser p = getParserForUnittest(sourceCode, "parseAliasInitializer");
auto initializer = p.parseAliasInitializer();
assert (initializer !is null);
assert (p.errorCount == 0);
stderr.writeln("Unittest for parseAliasInitializer() passed.");
}
/** /**
* Parses an AliasThisDeclaration * Parses an AliasThisDeclaration
@ -147,6 +156,16 @@ alias core.sys.posix.stdio.fileno fileno;
if (expect(tok!";") is null) return null; if (expect(tok!";") is null) return null;
return node; return node;
} }
unittest
{
auto sourceCode = q{alias oneTwoThree this;};
Parser p = getParserForUnittest(sourceCode, "parseAliasThisDeclaration");
auto aliasThis = p.parseAliasThisDeclaration();
assert (aliasThis !is null);
assert (p.errorCount == 0);
stderr.writeln("Unittest for parseAliasThisDeclaration() passed.");
}
/** /**
* Parses an AlignAttribute. * Parses an AlignAttribute.
@ -169,6 +188,18 @@ alias core.sys.posix.stdio.fileno fileno;
} }
return node; return node;
} }
unittest
{
auto sourceCode = q{align(42) align};
Parser p = getParserForUnittest(sourceCode, "parseAlignAttribute");
auto attribute = p.parseAlignAttribute();
assert (attribute !is null);
attribute = p.parseAlignAttribute();
assert (attribute !is null);
assert (p.errorCount == 0);
stderr.writeln("Unittest for parseAlignAttribute() passed.");
}
/** /**
* Parses an AndAndExpression * Parses an AndAndExpression
@ -3098,6 +3129,16 @@ invariant() foo();
if (expect(tok!")") is null) return null; if (expect(tok!")") is null) return null;
return node; return node;
} }
unittest
{
auto sourceCode = q{is ( x : uybte)}c;
Parser p = getParserForUnittest(sourceCode, "parseIsExpression");
auto isExp1 = p.parseIsExpression();
assert (isExp1 !is null);
assert (p.errorCount == 0);
stderr.writeln("Unittest for parseIsExpression passed.");
}
/** /**
* Parses a KeyValuePair * Parses a KeyValuePair
@ -3369,6 +3410,10 @@ invariant() foo();
node.symbol = parseSymbol(); node.symbol = parseSymbol();
return node; return node;
} }
unittest
{
}
/** /**
* Parses a Module * Parses a Module
@ -5951,8 +5996,6 @@ protected:
return hasMagicDelimiter!(tok!":")(); return hasMagicDelimiter!(tok!":")();
} }
bool hasMagicDelimiter(alias T)() bool hasMagicDelimiter(alias T)()
{ {
mixin(traceEnterAndExit!(__FUNCTION__)); mixin(traceEnterAndExit!(__FUNCTION__));
@ -6432,14 +6475,14 @@ protected:
} }
version (unittest) static void doNothingErrorFunction(string fileName, version (unittest) static void doNothingErrorFunction(string fileName,
int line, int column, string message) {} size_t line, size_t column, string message) {}
version (unittest) static Parser getParserForUnittest(string sourceCode, version (unittest) static Parser getParserForUnittest(string sourceCode,
string testName) string testName)
{ {
auto r = byToken(cast(ubyte[]) sourceCode); auto r = byToken(cast(ubyte[]) sourceCode);
Parser p = new Parser; Parser p = new Parser;
//p.messageFunction = &doNothingErrorFunction; p.messageFunction = &doNothingErrorFunction;
p.fileName = testName ~ ".d"; p.fileName = testName ~ ".d";
p.tokens = r.array(); p.tokens = r.array();
return p; return p;