module arsd.dom; // NOTE: do *NOT* override toString on Element subclasses. It won't work. // Instead, override writeToAppender(); // FIXME: should I keep processing instructions like and (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. // Stripping them is useful for reading php as html.... but adding them // is good for building php. // I need to maintain compatibility with the way it is now too. import arsd.characterencodings; import std.string; import std.exception; import std.uri; import std.array; import std.stdio; // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's // most likely a typo so I say kill kill kill. /// This might belong in another module, but it represents a file with a mime type and some data. /// Document implements this interface with type = text/html (see Document.contentType for more info) /// and data = document.toString, so you can return Documents anywhere web.d expects FileResources. interface FileResource { string contentType() const; immutable(ubyte)[] getData() const; } // this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. mixin template JavascriptStyleDispatch() { string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. if(v !is null) return set(name, v); return get(name); } string opIndex(string key) const { return get(key); } string opIndexAssign(string value, string field) { return set(field, value); } // FIXME: doesn't seem to work string* opBinary(string op)(string key) if(op == "in") { return key in fields; } } /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. /// /// Do not create this object directly. struct DataSet { this(Element e) { this._element = e; } private Element _element; string set(string name, string value) { _element.setAttribute("data-" ~ unCamelCase(name), value); return value; } string get(string name) const { return _element.getAttribute("data-" ~ unCamelCase(name)); } mixin JavascriptStyleDispatch!(); } /// for style, i want to be able to set it with a string like a plain attribute, /// but also be able to do properties Javascript style. struct ElementStyle { this(Element parent) { _element = parent; } Element _element; @property ref inout(string) _attribute() inout { auto s = "style" in _element.attributes; if(s is null) { auto e = cast() _element; // const_cast e.attributes["style"] = ""; // we need something to reference s = cast(inout) ("style" in e.attributes); } assert(s !is null); return *s; } alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. string set(string name, string value) { if(name.length == 0) return value; name = unCamelCase(name); auto r = rules(); r[name] = value; _attribute = ""; foreach(k, v; r) { if(_attribute.length) _attribute ~= " "; _attribute ~= k ~ ": " ~ v ~ ";"; } return value; } string get(string name) const { name = unCamelCase(name); auto r = rules(); if(name in r) return r[name]; return null; } string[string] rules() const { string[string] ret; foreach(rule; _attribute().split(";")) { rule = rule.strip(); if(rule.length == 0) continue; auto idx = rule.indexOf(":"); if(idx == -1) ret[rule] = ""; else { auto name = rule[0 .. idx].strip; auto value = rule[idx + 1 .. $].strip; ret[name] = value; } } return ret; } mixin JavascriptStyleDispatch!(); } ///. enum NodeType { Text = 3 } /// You can use this to do an easy null check or a dynamic cast+null check on any element. T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) in {} out(ret) { assert(ret !is null); } body { auto ret = cast(T) e; if(ret is null) throw new ElementNotFoundException(T.stringof, "passed value", file, line); return ret; } /// This represents almost everything in the DOM. class Element { // this ought to be private. don't use it directly. Element[] children; /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. string tagName; /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. string[string] attributes; /// In XML, it is valid to write for all elements with no children, but that breaks HTML, so I don't do it here. /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. private bool selfClosed; /// Get the parent Document object that contains this element. /// It may be null, so remember to check for that. Document parentDocument; ///. Element parentNode; /// Convenience function to try to do the right thing for HTML. This is the main /// way I create elements. static Element make(string tagName, string childInfo = null, string childInfo2 = null) { bool selfClosed = tagName.isInArray(selfClosedElements); Element e; // want to create the right kind of object for the given tag... switch(tagName) { case "table": e = new Table(null); break; case "a": e = new Link(null); break; case "form": e = new Form(null); break; case "tr": e = new TableRow(null); break; case "td", "th": e = new TableCell(null, tagName); break; default: e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere } // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too e.tagName = tagName; e.selfClosed = selfClosed; if(childInfo !is null) switch(tagName) { /* html5 convenience tags */ case "audio": if(childInfo.length) e.addChild("source", childInfo); if(childInfo2 !is null) e.appendText(childInfo2); break; case "source": e.src = childInfo; if(childInfo2 !is null) e.type = childInfo2; break; /* regular html 4 stuff */ case "img": e.src = childInfo; if(childInfo2 !is null) e.alt = childInfo2; break; case "link": e.href = childInfo; if(childInfo2 !is null) e.rel = childInfo2; break; case "option": e.innerText = childInfo; if(childInfo2 !is null) e.value = childInfo2; break; case "input": e.type = "hidden"; e.name = childInfo; if(childInfo2 !is null) e.value = childInfo2; break; case "a": e.innerText = childInfo; if(childInfo2 !is null) e.href = childInfo2; break; case "script": case "style": e.innerRawSource = childInfo; break; case "meta": e.name = childInfo; if(childInfo2 !is null) e.content = childInfo2; break; /* generically, assume we were passed text and perhaps class */ default: e.innerText = childInfo; if(childInfo2.length) e.className = childInfo2; } return e; } /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { parentDocument = _parentDocument; tagName = _tagName; if(_attributes !is null) attributes = _attributes; selfClosed = _selfClosed; } /// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. /// Note also that without a parent document, elements are always in strict, case-sensitive mode. this(string _tagName, string[string] _attributes = null) { tagName = _tagName; if(_attributes !is null) attributes = _attributes; selfClosed = tagName.isInArray(selfClosedElements); // this is meant to reserve some memory. It makes a small, but consistent improvement. //children.length = 8; //children.length = 0; } private this(Document _parentDocument) { parentDocument = _parentDocument; } /* ******************************* Navigating the DOM *********************************/ /// Returns the first child of this element. If it has no children, returns null. /// Remember, text nodes are children too. @property Element firstChild() { return children.length ? children[0] : null; } /// @property Element lastChild() { return children.length ? children[$ - 1] : null; } ///. @property Element previousSibling(string tagName = null) { if(this.parentNode is null) return null; Element ps = null; foreach(e; this.parentNode.childNodes) { if(e is this) break; if(tagName == "*" && e.nodeType != NodeType.Text) { ps = e; break; } if(tagName is null || e.tagName == tagName) ps = e; } return ps; } ///. @property Element nextSibling(string tagName = null) { if(this.parentNode is null) return null; Element ns = null; bool mightBe = false; foreach(e; this.parentNode.childNodes) { if(e is this) { mightBe = true; continue; } if(mightBe) { if(tagName == "*" && e.nodeType != NodeType.Text) { ns = e; break; } if(tagName is null || e.tagName == tagName) { ns = e; break; } } } return ns; } /// Gets the nearest node, going up the chain, with the given tagName /// May return null or throw. T getParent(T = Element)(string tagName = null) if(is(T : Element)) { if(tagName is null) { static if(is(T == Form)) tagName = "form"; else static if(is(T == Table)) tagName = "table"; else static if(is(T == Table)) tagName == "a"; } auto par = this.parentNode; while(par !is null) { if(tagName is null || par.tagName == tagName) break; par = par.parentNode; } static if(!is(T == Element)) { auto t = cast(T) par; if(t is null) throw new ElementNotFoundException("", tagName ~ " parent not found"); } else auto t = par; return t; } ///. Element getElementById(string id) { // FIXME: I use this function a lot, and it's kinda slow // not terribly slow, but not great. foreach(e; tree) if(e.id == id) return e; return null; } ///. final SomeElementType requireElementById(SomeElementType = Element)(string id) if( is(SomeElementType : Element) ) out(ret) { assert(ret !is null); } body { auto e = cast(SomeElementType) getElementById(id); if(e is null) throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id); return e; } ///. final SomeElementType requireSelector(SomeElementType = Element)(string selector) if( is(SomeElementType : Element) ) out(ret) { assert(ret !is null); } body { auto e = cast(SomeElementType) querySelector(selector); if(e is null) throw new ElementNotFoundException(SomeElementType.stringof, selector); return e; } /// Note: you can give multiple selectors, separated by commas. /// It will return the first match it finds. Element querySelector(string selector) { // FIXME: inefficient; it gets all results just to discard most of them auto list = getElementsBySelector(selector); if(list.length == 0) return null; return list[0]; } /// a more standards-compliant alias for getElementsBySelector Element[] querySelectorAll(string selector) { return getElementsBySelector(selector); } /** Does a CSS selector * -- all, default if nothing else is there tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector It is all additive OP space = descendant > = direct descendant + = sibling (E+F Matches any F element immediately preceded by a sibling element E) [foo] Foo is present as an attribute [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". [item$=sdas] ends with [item^-sdsad] begins with Quotes are optional here. Pseudos: :first-child :last-child :link (same as a[href] for our purposes here) There can be commas separating the selector. A comma separated list result is OR'd onto the main. This ONLY cares about elements. text, etc, are ignored There should be two functions: given element, does it match the selector? and given a selector, give me all the elements */ Element[] getElementsBySelector(string selector) { // FIXME: this function could probably use some performance attention // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. // POSSIBLE FIXME: this also sends attribute things to lower in the selector, // but the actual get selector check is still case sensitive... if(parentDocument && parentDocument.loose) selector = selector.toLower; Element[] ret; foreach(sel; parseSelectorString(selector)) ret ~= sel.getElements(this); return ret; } /// . Element[] getElementsByClassName(string cn) { // is this correct? return getElementsBySelector("." ~ cn); } ///. Element[] getElementsByTagName(string tag) { if(parentDocument && parentDocument.loose) tag = tag.toLower(); Element[] ret; foreach(e; tree) if(e.tagName == tag) ret ~= e; return ret; } /* ******************************* Attributes *********************************/ /** Gets the given attribute value, or null if the attribute is not set. Note that the returned string is decoded, so it no longer contains any xml entities. */ string getAttribute(string name) const { if(parentDocument && parentDocument.loose) name = name.toLower(); auto e = name in attributes; if(e) return *e; else return null; } /** Sets an attribute. Returns this for easy chaining */ Element setAttribute(string name, string value) { if(parentDocument && parentDocument.loose) name = name.toLower(); // I never use this shit legitimately and neither should you auto it = name.toLower; if(it == "href" || it == "src") { auto v = value.strip.toLower(); if(v.startsWith("vbscript:")) value = value[9..$]; if(v.startsWith("javascript:")) value = value[11..$]; } attributes[name] = value; return this; } /** Returns if the attribute exists. */ bool hasAttribute(string name) { if(parentDocument && parentDocument.loose) name = name.toLower(); if(name in attributes) return true; else return false; } /** Removes the given attribute from the element. */ void removeAttribute(string name) { if(parentDocument && parentDocument.loose) name = name.toLower(); if(name in attributes) attributes.remove(name); } /** Gets the class attribute's contents. Returns an empty string if it has no class. */ string className() const { auto c = getAttribute("class"); if(c is null) return ""; return c; } ///. Element className(string c) { setAttribute("class", c); return this; } /** Provides easy access to attributes, object style. auto element = Element.make("a"); a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); string where = a.href; // same as a.getAttribute("href"); */ // name != "popFront" is so duck typing doesn't think it's a range string opDispatch(string name)(string v = null) if(name != "popFront") { if(v !is null) setAttribute(name, v); return getAttribute(name); } /** Returns the element's children. */ @property const(Element[]) childNodes() const { return children; } /// Mutable version of the same @property Element[] childNodes() { // FIXME: the above should be inout return children; } /// Adds a string to the class attribute. The class attribute is used a lot in CSS. Element addClass(string c) { string cn = getAttribute("class"); if(cn is null) { setAttribute("class", c); return this; } else { setAttribute("class", cn ~ " " ~ c); } return this; } /// Removes a particular class name. Element removeClass(string c) { auto cn = className; // FIXME: this is actually wrong! className = cn.replace(c, "").strip; return this; } /// Returns whether the given class appears in this element. bool hasClass(string c) { auto cn = className; auto idx = cn.indexOf(c); if(idx == -1) return false; foreach(cla; cn.split(" ")) if(cla == c) return true; return false; /* int rightSide = idx + c.length; bool checkRight() { if(rightSide == cn.length) return true; // it's the only class else if(iswhite(cn[rightSide])) return true; return false; // this is a substring of something else.. } if(idx == 0) { return checkRight(); } else { if(!iswhite(cn[idx - 1])) return false; // substring return checkRight(); } assert(0); */ } /// HTML5's dataset property. It is an alternate view into attributes with the data- prefix. /// /// Given: /// /// We get: assert(a.dataset.myProperty == "cool"); DataSet dataset() { return DataSet(this); } /// Provides both string and object style (like in Javascript) access to the style attribute. @property ElementStyle style() { return ElementStyle(this); } /// This sets the style attribute with a string. @property ElementStyle style(string s) { this.setAttribute("style", s); return this.style(); } private void parseAttributes(string[] whichOnes = null) { /+ if(whichOnes is null) whichOnes = attributes.keys; foreach(attr; whichOnes) { switch(attr) { case "id": break; case "class": break; case "style": break; default: // we don't care about it } } +/ } // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. ///. @property CssStyle computedStyle() { if(_computedStyle is null) { auto style = this.getAttribute("style"); /* we'll treat shitty old html attributes as css here */ if(this.hasAttribute("width")) style ~= "; width: " ~ this.width; if(this.hasAttribute("height")) style ~= "; width: " ~ this.height; if(this.hasAttribute("bgcolor")) style ~= "; background-color: " ~ this.bgcolor; if(this.tagName == "body" && this.hasAttribute("text")) style ~= "; color: " ~ this.text; if(this.hasAttribute("color")) style ~= "; color: " ~ this.color; /* done */ _computedStyle = new CssStyle(null, style); // gives at least something to work with } return _computedStyle; } private CssStyle _computedStyle; /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good version(browser) { void* expansionHook; ///ditto int offsetWidth; ///ditto int offsetHeight; ///ditto int offsetLeft; ///ditto int offsetTop; ///ditto Element offsetParent; ///ditto bool hasLayout; ///ditto int zIndex; ///ditto ///ditto int absoluteLeft() { int a = offsetLeft; auto p = offsetParent; while(p) { a += p.offsetLeft; p = p.offsetParent; } return a; } ///ditto int absoluteTop() { int a = offsetTop; auto p = offsetParent; while(p) { a += p.offsetTop; p = p.offsetParent; } return a; } } // Back to the regular dom functions public: /* ******************************* DOM Mutation *********************************/ /// Removes all inner content from the tag; all child text and elements are gone. void removeAllChildren() out { assert(this.children.length == 0); } body { children = null; } /// convenience function to quickly add a tag with some text or /// other relevant info (for example, it's a src for an element /// instead of inner text) Element addChild(string tagName, string childInfo = null, string childInfo2 = null) in { assert(tagName !is null); } out(e) { assert(e.parentNode is this); assert(e.parentDocument is this.parentDocument); } body { auto e = Element.make(tagName, childInfo, childInfo2); // FIXME (maybe): if the thing is self closed, we might want to go ahead and // return the parent. That will break existing code though. return appendChild(e); } /// Another convenience function. Adds a child directly after the current one, returning /// the new child. /// /// Between this, addChild, and parentNode, you can build a tree as a single expression. Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) in { assert(tagName !is null); assert(parentNode !is null); } out(e) { assert(e.parentNode is this.parentNode); assert(e.parentDocument is this.parentDocument); } body { auto e = Element.make(tagName, childInfo, childInfo2); return parentNode.insertAfter(this, e); } /// Convenience function to append text intermixed with other children. /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); /// or div.addChildren("Hello, ", user.name, "!"); /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. void addChildren(T...)(T t) { foreach(item; t) { static if(is(item : Element)) appendChild(item); else static if (is(isSomeString!(item))) appendText(to!string(item)); else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); } } ///. Element addChild(string tagName, Element firstChild) in { assert(parentDocument !is null); assert(firstChild !is null); } out(ret) { assert(ret !is null); assert(ret.parentNode is this); assert(firstChild.parentNode is ret); assert(ret.parentDocument is this.parentDocument); assert(firstChild.parentDocument is this.parentDocument); } body { auto e = parentDocument.createElement(tagName); e.appendChild(firstChild); this.appendChild(e); return e; } Element addChild(string tagName, Html innerHtml) in { } out(ret) { assert(ret !is null); assert(ret.parentNode is this); assert(ret.parentDocument is this.parentDocument); } body { auto e = Element.make(tagName); this.appendChild(e); e.innerHTML = innerHtml.source; return e; } /// Appends the given element to this one. The given element must not have a parent already. Element appendChild(Element e) in { assert(e !is null); assert(e.parentNode is null); } out (ret) { assert(e.parentNode is this); assert(e.parentDocument is this.parentDocument); assert(e is ret); } body { selfClosed = false; e.parentNode = this; e.parentDocument = this.parentDocument; children ~= e; return e; } /// . void appendChildren(Element[] children) { foreach(ele; children) appendChild(ele); } /// Inserts the second element to this node, right before the first param Element insertBefore(in Element where, Element what) in { assert(where !is null); assert(where.parentNode is this); assert(what !is null); assert(what.parentNode is null); } out (ret) { assert(where.parentNode is this); assert(what.parentNode is this); assert(what.parentDocument is this.parentDocument); assert(ret is what); } body { foreach(i, e; children) { if(e is where) { children = children[0..i] ~ what ~ children[i..$]; what.parentDocument = this.parentDocument; what.parentNode = this; return what; } } return what; assert(0); } ///. Element insertAfter(in Element where, Element what) in { assert(where !is null); assert(where.parentNode is this); assert(what !is null); assert(what.parentNode is null); } out (ret) { assert(where.parentNode is this); assert(what.parentNode is this); assert(what.parentDocument is this.parentDocument); assert(ret is what); } body { foreach(i, e; children) { if(e is where) { children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; what.parentNode = this; what.parentDocument = this.parentDocument; return what; } } return what; assert(0); } /// swaps one child for a new thing. Returns the old child which is now parentless. Element swapNode(Element child, Element replacement) in { assert(child !is null); assert(replacement !is null); assert(child.parentNode is this); } out(ret) { assert(ret is child); assert(ret.parentNode is null); assert(replacement.parentNode is this); assert(replacement.parentDocument is this.parentDocument); } body { foreach(ref c; this.children) if(c is child) { c.parentNode = null; c = replacement; c.parentNode = this; c.parentDocument = this.parentDocument; return child; } assert(0); } ///. Element appendText(string text) { Element e = new TextNode(parentDocument, text); return appendChild(e); } ///. @property Element[] childElements() { Element[] ret; foreach(c; children) if(c.nodeType == 1) ret ~= c; return ret; } /// Appends the given html to the element, returning the elements appended Element[] appendHtml(string html) { Document d = new Document("" ~ html ~ ""); return stealChildren(d.root); } ///. void reparent(Element newParent) in { assert(newParent !is null); assert(parentNode !is null); } out { assert(this.parentNode == newParent); assert(isInArray(this, newParent.children)); } body { parentNode.removeChild(this); newParent.appendChild(this); } ///. void insertChildAfter(Element child, Element where) in { assert(child !is null); assert(where !is null); assert(where.parentNode is this); assert(!selfClosed); assert(isInArray(where, children)); } out { assert(child.parentNode is this); assert(where.parentNode is this); assert(isInArray(where, children)); assert(isInArray(child, children)); } body { foreach(i, c; children) { if(c is where) { i++; children = children[0..i] ~ child ~ children[i..$]; child.parentNode = this; child.parentDocument = this.parentDocument; break; } } } ///. Element[] stealChildren(Element e, Element position = null) in { assert(!selfClosed); assert(e !is null); if(position !is null) assert(isInArray(position, children)); } out (ret) { assert(e.children.length == 0); debug foreach(child; ret) { assert(child.parentNode is this); assert(child.parentDocument is this.parentDocument); } } body { foreach(c; e.children) { c.parentNode = this; c.parentDocument = this.parentDocument; } if(position is null) children ~= e.children; else { foreach(i, child; children) { if(child is position) { children = children[0..i] ~ e.children ~ children[i..$]; break; } } } auto ret = e.children.dup; e.children.length = 0; return ret; } /// Puts the current element first in our children list. The given element must not have a parent already. Element prependChild(Element e) in { assert(e.parentNode is null); assert(!selfClosed); } out { assert(e.parentNode is this); assert(e.parentDocument is this.parentDocument); assert(children[0] is e); } body { e.parentNode = this; e.parentDocument = this.parentDocument; children = e ~ children; return e; } /** Returns a string containing all child elements, formatted such that it could be pasted into an XML file. */ @property string innerHTML(Appender!string where = appender!string()) const { if(children is null) return ""; auto start = where.data.length; foreach(child; children) { assert(child !is null); child.writeToAppender(where); } return where.data[start .. $]; } /** Takes some html and replaces the element's children with the tree made from the string. */ @property void innerHTML(string html) { if(html.length) selfClosed = false; if(html.length == 0) { // I often say innerHTML = ""; as a shortcut to clear it out, // so let's optimize that slightly. removeAllChildren(); return; } auto doc = new Document(); doc.parse("" ~ html ~ ""); // FIXME: this should preserve the strictness of the parent document children = doc.root.children; foreach(c; children) { c.parentNode = this; c.parentDocument = this.parentDocument; } reparentTreeDocuments(); doc.root.children = null; } /// ditto @property void innerHTML(Html html) { this.innerHTML = html.source; } private void reparentTreeDocuments() { foreach(c; this.tree) c.parentDocument = this.parentDocument; } /** Replaces this node with the given html string, which is parsed Note: this invalidates the this reference, since it is removed from the tree. Returns the new children that replace this. */ @property Element[] outerHTML(string html) { auto doc = new Document(); doc.parse("" ~ html ~ ""); // FIXME: needs to preserve the strictness children = doc.root.children; foreach(c; children) { c.parentNode = this; c.parentDocument = this.parentDocument; } reparentTreeDocuments(); stripOut(); return doc.root.children; } /// Returns all the html for this element, including the tag itself. /// This is equivalent to calling toString(). @property string outerHTML() { return this.toString(); } /// This sets the inner content of the element *without* trying to parse it. /// You can inject any code in there; this serves as an escape hatch from the dom. /// /// The only times you might actually need it are for