fix, UB in symlist, GC freed some strings in the unmanaged symbol tree

This commit is contained in:
Basile Burg 2016-03-12 04:19:21 +01:00
parent 0d7e34c1f1
commit 6d60a3118c
1 changed files with 213 additions and 188 deletions

View File

@ -2,23 +2,23 @@
Usage Usage
===== =====
- In Coedit: - In Coedit:
the program must be located somewhere in the PATH. the program must be located somewhere in the PATH.
- Elsewhere: - Elsewhere:
invoke with `[-j] [<filename>]`. invoke with `[-j] [<filename>]`.
- `-j`: optional, if set then the program outputs the list (in stdout) in JSON - `-j`: optional, if set then the program outputs the list (in stdout) in JSON
otherwise in Pascal streaming text format. otherwise in Pascal streaming text format.
- `<filename>`: optional, the D module filename, if not set then the program - `<filename>`: optional, the D module filename, if not set then the program
reads the module from stdin. reads the module from stdin.
- see the source for more information about how to use the output. - see the source for more information about how to use the output.
It's basically a tree of struct with 3 members: symbol type, name and location. It's basically a tree of struct with 3 members: symbol type, name and location
- Test in CE as a runnable module: - Test in CE as a runnable module:
click `Compile file and run ...` and type either `<CFF>` or `-j <CFF>` in the click `Compile file and run ...` and type either `<CFF>` or `-j <CFF>` in the
input query dialog. Note that this will only work if libdparse is setup in the input query dialog. Note that this will only work if libdparse is setup in the
library manager. library manager.
*/ */
module cesyms; module cesyms;
@ -37,115 +37,138 @@ void main(string[] args)
{ {
// format // format
bool asJson; bool asJson;
getopt(args, std.getopt.config.passThrough,'j', &asJson); getopt(args, std.getopt.config.passThrough, 'j', &asJson);
// get either the module from stdin or from first arg // get either the module from stdin or from first arg
string fname; string fname;
ubyte[] source; ubyte[] source;
if (args.length == 1) if (args.length == 1)
{ {
version(runnable_module) version (runnable_module)
{ {
source = cast(ubyte[]) read(__FILE__, size_t.max); source = cast(ubyte[]) read(__FILE__, size_t.max);
} }
else foreach(buff; stdin.byChunk(1024)) else
source ~= buff; foreach (buff; stdin.byChunk(1024))
source ~= buff;
} }
else if (args.length == 2) else if (args.length == 2)
{ {
fname = args[$-1]; fname = args[$ - 1];
if (!fname.exists) return; if (!fname.exists)
return;
source = cast(ubyte[]) read(fname, size_t.max); source = cast(ubyte[]) read(fname, size_t.max);
} }
else return; else
return;
// load and parse the file // load and parse the file
auto config = LexerConfig(fname, StringBehavior.source, WhitespaceBehavior.skip); auto config = LexerConfig(fname, StringBehavior.source, WhitespaceBehavior.skip);
auto scache = StringCache(StringCache.defaultBucketCount); auto scache = StringCache(StringCache.defaultBucketCount);
auto ast = parseModule(getTokensForParser(source, config, &scache), fname, null, &(SymbolListBuilder.astError)); auto ast = parseModule(getTokensForParser(source, config, &scache), fname,
null, &(SymbolListBuilder.astError));
// visit each root member // visit each root member
SymbolListBuilder slb = construct!SymbolListBuilder; SymbolListBuilder slb = construct!SymbolListBuilder;
foreach(Declaration decl; ast.declarations) foreach (Declaration decl; ast.declarations)
{ {
slb.resetRoot; slb.resetRoot;
slb.visit(decl); slb.visit(decl);
} }
version(none) version (none)
{ {
int level = -1; int level = -1;
void print(Symbol * s) void print(Symbol* s)
{ {
foreach(i; 0 .. level) write("."); foreach (i; 0 .. level)
write(".");
level++; level++;
write(s.name, '\r'); write(s.name, '\r');
foreach(ss; s.subs) foreach (ss; s.subs)
print(ss); print(ss);
level--; level--;
} }
print(slb.root); print(slb.root);
} }
else else
{ {
if (asJson) write(slb.serializeJson); if (asJson)
else write(slb.serializePascal); write(slb.serializeJson);
} else
write(slb.serializePascal);
}
slb.destruct; slb.destruct;
} }
// libdparse warnings includes some "'", which in Pascal are string delim // libdparse warnings includes some "'", which in Pascal are string delim
string patchPasStringLitteral(const ref string p) string patchPasStringLitteral(string p)
{ {
import std.range : empty, front, popFront;
string result; string result;
for (auto i = 0; i < p.length; i++) while (!p.empty)
{ {
auto curr = p[i]; dchar curr = p.front;
if (curr == 0) switch (curr)
{
default:
result ~= curr;
break; break;
else if (curr == 13 || curr == 10) case 10, 13:
result ~= ' '; result ~= ' ';
else if (curr == '\'') break;
case '\'':
result ~= "'#39'"; result ~= "'#39'";
else }
result ~= curr; p.popFront;
} }
return result; return result;
} }
// Memory utils ---------------------------------------------------------------+ // Memory utils ---------------------------------------------------------------+
void * getMem(size_t size) nothrow void* getMem(size_t size) nothrow
{ {
import std.c.stdlib; import std.c.stdlib;
auto result = malloc(size); auto result = malloc(size);
assert(result, "Out of memory"); assert(result, "Out of memory");
return result; return result;
} }
CT construct(CT, A...)(A a) CT construct(CT, A...)(A a) if (is(CT == class) && !isAbstractClass!CT)
if (is(CT == class))
{ {
import std.conv : emplace; auto size = typeid(CT).init.length;
auto size = __traits(classInstanceSize, CT); auto memory = getMem(size);
auto memory = getMem(size)[0 .. size]; memory[0 .. size] = typeid(CT).init[];
return emplace!(CT, A)(memory, a); static if (__traits(hasMember, CT, "__ctor"))
(cast(CT)(memory)).__ctor(a);
import core.memory : GC;
GC.addRange(memory, size, typeid(CT));
return cast(CT) memory;
} }
ST * construct(ST, A...)(A a) ST* construct(ST, A...)(A a) if (is(ST == struct) || is(ST == union))
if(is(ST==struct))
{ {
import std.conv : emplace; import std.conv : emplace;
auto size = ST.sizeof; auto size = ST.sizeof;
auto memory = getMem(size)[0 .. size]; auto memory = getMem(size)[0 .. size];
import core.memory : GC;
GC.addRange(memory.ptr, size, typeid(ST));
return emplace!(ST, A)(memory, a); return emplace!(ST, A)(memory, a);
} }
void destruct(T)(ref T instance) void destruct(T)(ref T instance)
if (is(T == class) || (isPointer!T && is(PointerTarget!T == struct))) if (is(T == class) || (isPointer!T && is(PointerTarget!T == struct)))
{ {
if (!instance) return; if (!instance)
return;
destroy(instance); destroy(instance);
instance = null; instance = null;
} }
@ -155,14 +178,14 @@ if (is(T == class) || (isPointer!T && is(PointerTarget!T == struct)))
enum SymbolType enum SymbolType
{ {
_alias, _alias,
_class, _class,
_enum, _enum,
_error, _error,
_function, _function,
_interface, _interface,
_import, _import,
_mixin, // (template decl) _mixin, // (template decl)
_struct, _struct,
_template, _template,
_union, _union,
_variable, _variable,
@ -174,51 +197,53 @@ struct Symbol
size_t line; size_t line;
size_t col; size_t col;
string name; string name;
SymbolType type; SymbolType type;
Symbol * [] subs; Symbol*[] subs;
~this() ~this()
{ {
foreach_reverse(i; 0..subs.length) foreach_reverse (i; 0 .. subs.length)
subs[i].destruct; subs[i].destruct;
} }
void serialize(List)(auto ref List list) void serialize(List)(auto ref List list)
{ {
static if (is(List == Appender!string)) static if (is(List == Appender!string))
serializePascal(list); serializePascal(list);
else static if (is(List == JSONValue)) else static if (is(List == JSONValue))
serializeJson(list); serializeJson(list);
else static assert(0, "serialization kind cannot be deduced from list"); else
static assert(0, "serialization kind cannot be deduced from list");
} }
void serializePascal(ref Appender!string lfmApp) void serializePascal(ref Appender!string lfmApp)
{ {
lfmApp.put("\ritem\r"); lfmApp.put("\ritem\r");
lfmApp.put(format("line = %d\r", line)); lfmApp.put(format("line = %d\r", line));
lfmApp.put(format("col = %d\r", col)); lfmApp.put(format("col = %d\r", col));
lfmApp.put(format("name = '%s'\r", name)); lfmApp.put(format("name = '%s'\r", name));
lfmApp.put(format("symType = %s\r", type)); lfmApp.put(format("symType = %s\r", type));
lfmApp.put("subs = <"); lfmApp.put("subs = <");
if (subs.length) foreach(Symbol * sub; subs) if (subs.length)
sub.serialize(lfmApp); foreach (Symbol* sub; subs)
sub.serialize(lfmApp);
lfmApp.put(">\r"); lfmApp.put(">\r");
lfmApp.put("end"); lfmApp.put("end");
} }
void serializeJson(ref JSONValue json) void serializeJson(ref JSONValue json)
{ {
auto vobj = parseJSON("{}"); auto vobj = parseJSON("{}");
vobj["line"]= JSONValue(line); vobj["line"] = JSONValue(line);
vobj["col"] = JSONValue(col); vobj["col"] = JSONValue(col);
vobj["name"]= JSONValue(name); vobj["name"] = JSONValue(name);
vobj["type"]= JSONValue(to!string(type)); vobj["type"] = JSONValue(to!string(type));
if (subs.length) if (subs.length)
{ {
auto vsubs = parseJSON("[]"); auto vsubs = parseJSON("[]");
foreach(Symbol * sub; subs) foreach (Symbol* sub; subs)
sub.serializeJson(vsubs); sub.serializeJson(vsubs);
vobj["items"] = vsubs; vobj["items"] = vsubs;
} }
@ -230,90 +255,89 @@ struct Symbol
// AST visitor/Symbol list ----------------------------------------------------+ // AST visitor/Symbol list ----------------------------------------------------+
class SymbolListBuilder : ASTVisitor class SymbolListBuilder : ASTVisitor
{ {
Symbol * root; Symbol* root;
Symbol * parent; Symbol* parent;
// for some reason (?) the .name of a (static Symbol* []) item was lost // for some reason (?) the .name of a (static Symbol* []) item was lost
__gshared static Symbol[] illFormed; __gshared static Symbol[] illFormed;
size_t count; size_t count;
alias visit = ASTVisitor.visit; alias visit = ASTVisitor.visit;
this() this()
{ {
root = construct!Symbol; root = construct!Symbol;
resetRoot; resetRoot;
} }
~this() ~this()
{ {
root.destruct; root.destruct;
} }
static void astError(string fname, size_t line, size_t col, string msg, bool isErr) static void astError(string fname, size_t line, size_t col, string msg, bool isErr)
{ {
Symbol * newSym = construct!Symbol; Symbol* newSym = construct!Symbol;
newSym.col = col; newSym.col = col;
newSym.line = line; newSym.line = line;
newSym.name = patchPasStringLitteral(msg); newSym.name = patchPasStringLitteral(msg);
newSym.type = isErr ? SymbolType._error : SymbolType._warning; newSym.type = isErr ? SymbolType._error : SymbolType._warning;
illFormed ~= * newSym; illFormed ~= *newSym;
} }
final void resetRoot(){parent = root;} final void resetRoot()
{
parent = root;
}
final string serializePascal() final string serializePascal()
{ {
Appender!string lfmApp; Appender!string lfmApp;
lfmApp.reserve(count * 64); lfmApp.reserve(count * 64);
lfmApp.put("object TSymbolList\rsymbols = <"); lfmApp.put("object TSymbolList\rsymbols = <");
foreach(sym; illFormed) sym.serialize(lfmApp); foreach (sym; illFormed)
foreach(sym; root.subs) sym.serialize(lfmApp); sym.serialize(lfmApp);
foreach (sym; root.subs)
sym.serialize(lfmApp);
lfmApp.put(">\rend\r\n"); lfmApp.put(">\rend\r\n");
return lfmApp.data; return lfmApp.data;
} }
final string serializeJson() final string serializeJson()
{ {
JSONValue result = parseJSON("{}"); JSONValue result = parseJSON("{}");
JSONValue vsubs = parseJSON("[]"); JSONValue vsubs = parseJSON("[]");
foreach(sym; illFormed) sym.serialize(vsubs); foreach (sym; illFormed)
foreach(sym; root.subs) sym.serialize(vsubs); sym.serialize(vsubs);
foreach (sym; root.subs)
sym.serialize(vsubs);
result["items"] = vsubs; result["items"] = vsubs;
version(assert) return result.toPrettyString; version (assert)
return result.toPrettyString;
// else: release mode // else: release mode
else return result.toString; else
} return result.toString;
/// returns a new symbol if the declarator is based on a Token named "name".
final Symbol * addDeclaration(DT)(DT adt)
{
static if (__traits(hasMember, DT, "name"))
{
count++;
auto result = construct!Symbol;
result.name = adt.name.text;
result.line = adt.name.line;
result.col = adt.name.column;
parent.subs ~= result;
return result;
}
else static assert(0, "addDeclaration no implemented for " ~ DT.stringof);
} }
/// visitor implementation if the declarator is based on a Token named "name". /// visitor implementation if the declaration has a "name".
final void namedVisitorImpl(DT, SymbolType st, bool dig = true)(const(DT) dt) final void namedVisitorImpl(DT, SymbolType st, bool dig = true)(const(DT) dt)
if (__traits(hasMember, DT, "name"))
{ {
auto newSymbol = addDeclaration(dt); ++count;
Symbol* newSymbol = construct!Symbol;
newSymbol.name = dt.name.text;
newSymbol.line = dt.name.line;
newSymbol.col = dt.name.column;
newSymbol.type = st; newSymbol.type = st;
// parent.subs ~= newSymbol;
static if (dig) static if (dig)
{ {
auto previousParent = parent; auto previousParent = parent;
scope(exit) parent = previousParent; scope (exit)
parent = previousParent;
parent = newSymbol; parent = newSymbol;
dt.accept(this); dt.accept(this);
} }
@ -323,19 +347,18 @@ class SymbolListBuilder : ASTVisitor
final void otherVisitorImpl(SymbolType st, string name, size_t line, size_t col) final void otherVisitorImpl(SymbolType st, string name, size_t line, size_t col)
{ {
count++; count++;
auto result = construct!Symbol; Symbol* result = construct!Symbol;
result.name = name; result.name = name.idup;
result.line = line; result.line = line;
result.col = col; result.col = col;
result.type = st; result.type = st;
parent.subs ~= result; parent.subs ~= result;
} }
final override void visit(const AliasDeclaration decl) final override void visit(const AliasDeclaration decl)
{ {
// why is initializers an array ? if (decl.initializers.length)
if (decl.initializers.length > 0) namedVisitorImpl!(AliasInitializer, SymbolType._alias)(decl.initializers[0]);
namedVisitorImpl!(AliasInitializer, SymbolType._alias)(decl.initializers[0]);
} }
final override void visit(const AnonymousEnumMember decl) final override void visit(const AnonymousEnumMember decl)
@ -350,103 +373,105 @@ class SymbolListBuilder : ASTVisitor
final override void visit(const AutoDeclaration decl) final override void visit(const AutoDeclaration decl)
{ {
otherVisitorImpl(SymbolType._enum, decl.identifiers[0].text, if (decl.identifiers.length)
decl.identifiers[0].line, decl.identifiers[0].column); {
decl.accept(this); otherVisitorImpl(SymbolType._variable, decl.identifiers[0].text,
decl.identifiers[0].line, decl.identifiers[0].column);
}
} }
final override void visit(const ClassDeclaration decl) final override void visit(const ClassDeclaration decl)
{ {
namedVisitorImpl!(ClassDeclaration, SymbolType._class)(decl); namedVisitorImpl!(ClassDeclaration, SymbolType._class)(decl);
} }
final override void visit(const Constructor decl) final override void visit(const Constructor decl)
{ {
otherVisitorImpl(SymbolType._function, "this", decl.line, decl.column); otherVisitorImpl(SymbolType._function, "this", decl.line, decl.column);
} }
final override void visit(const Destructor decl) final override void visit(const Destructor decl)
{ {
otherVisitorImpl(SymbolType._function, "~this", decl.line, decl.column); otherVisitorImpl(SymbolType._function, "~this", decl.line, decl.column);
} }
final override void visit(const EnumDeclaration decl) final override void visit(const EnumDeclaration decl)
{ {
namedVisitorImpl!(EnumDeclaration, SymbolType._enum)(decl); namedVisitorImpl!(EnumDeclaration, SymbolType._enum)(decl);
} }
final override void visit(const EponymousTemplateDeclaration decl) final override void visit(const EponymousTemplateDeclaration decl)
{ {
namedVisitorImpl!(EponymousTemplateDeclaration, SymbolType._template)(decl); namedVisitorImpl!(EponymousTemplateDeclaration, SymbolType._template)(decl);
} }
final override void visit(const FunctionDeclaration decl) final override void visit(const FunctionDeclaration decl)
{ {
namedVisitorImpl!(FunctionDeclaration, SymbolType._function)(decl); namedVisitorImpl!(FunctionDeclaration, SymbolType._function)(decl);
} }
final override void visit(const InterfaceDeclaration decl) final override void visit(const InterfaceDeclaration decl)
{ {
namedVisitorImpl!(InterfaceDeclaration, SymbolType._interface)(decl); namedVisitorImpl!(InterfaceDeclaration, SymbolType._interface)(decl);
} }
final override void visit(const ImportDeclaration decl) final override void visit(const ImportDeclaration decl)
{ {
foreach(const(SingleImport) si; decl.singleImports) foreach (const(SingleImport) si; decl.singleImports)
{ {
if (!si.identifierChain.identifiers.length) if (!si.identifierChain.identifiers.length)
continue; continue;
//
string[] modules; string[] modules;
foreach(ident; si.identifierChain.identifiers) foreach (ident; si.identifierChain.identifiers)
{ {
modules ~= ident.text; modules ~= ident.text;
modules ~= "."; modules ~= ".";
} }
// otherVisitorImpl(SymbolType._import, modules[0 .. $ - 1].join,
otherVisitorImpl(SymbolType._import, modules[0..$-1].join,
si.identifierChain.identifiers[0].line, si.identifierChain.identifiers[0].line,
si.identifierChain.identifiers[0].column si.identifierChain.identifiers[0].column);
); }
}
} }
final override void visit(const MixinTemplateDeclaration decl) final override void visit(const MixinTemplateDeclaration decl)
{ {
namedVisitorImpl!(TemplateDeclaration, SymbolType._mixin)(decl.templateDeclaration); namedVisitorImpl!(TemplateDeclaration, SymbolType._mixin)(decl.templateDeclaration);
}
final override void visit(const StructDeclaration decl)
{
namedVisitorImpl!(StructDeclaration, SymbolType._struct)(decl);
} }
final override void visit(const TemplateDeclaration decl) final override void visit(const StructDeclaration decl)
{ {
namedVisitorImpl!(TemplateDeclaration, SymbolType._template)(decl); namedVisitorImpl!(StructDeclaration, SymbolType._struct)(decl);
} }
final override void visit(const UnionDeclaration decl) final override void visit(const TemplateDeclaration decl)
{ {
namedVisitorImpl!(UnionDeclaration, SymbolType._union)(decl); namedVisitorImpl!(TemplateDeclaration, SymbolType._template)(decl);
} }
final override void visit(const VariableDeclaration decl) final override void visit(const UnionDeclaration decl)
{ {
foreach(elem; decl.declarators) namedVisitorImpl!(UnionDeclaration, SymbolType._union)(decl);
namedVisitorImpl!(Declarator, SymbolType._variable, false)(elem); }
if (decl.autoDeclaration)
final override void visit(const VariableDeclaration decl)
{
if (decl.declarators)
foreach (elem; decl.declarators)
namedVisitorImpl!(Declarator, SymbolType._variable, false)(elem);
else if (decl.autoDeclaration)
visit(decl.autoDeclaration); visit(decl.autoDeclaration);
} }
final override void visit(const StaticConstructor decl) final override void visit(const StaticConstructor decl)
{ {
otherVisitorImpl(SymbolType._function, "static this", decl.line, decl.column); otherVisitorImpl(SymbolType._function, "static this", decl.line, decl.column);
} }
final override void visit(const StaticDestructor decl) final override void visit(const StaticDestructor decl)
{ {
otherVisitorImpl(SymbolType._function, "static ~this", decl.line, decl.column); otherVisitorImpl(SymbolType._function, "static ~this", decl.line, decl.column);
} }
} }
//---- //----