dephobosify as step toward making compile times great again

2025-09-07 10:28:47 -04:00 · 2025-09-07 10:28:47 -04:00 · f6b5bb9def
parent 13a767fd31
commit f6b5bb9def
1 changed files with 303 additions and 47 deletions
--- a/dom.d
+++ b/dom.d
@ -47,6 +47,10 @@
 	implementations =
 	These provide implementations of other functionality.
 	History:
 		The `toString` methods used to optionally take a Phobos `appender`,
 		but now it takes a private internal implementation as of August 26, 2025. This may change again.
 +/
 module arsd.dom;
@ -541,6 +545,7 @@ class Document : FileResource, DomParent {
 			dataEncoding = dataEncoding.replace("-", "");
 			dataEncoding = dataEncoding.replace("_", "");
 			if(dataEncoding == "utf8") {
 				import std.utf;
 				try {
 					validate(rawdata);
 				} catch(UTFException e) {
@ -700,7 +705,7 @@ class Document : FileResource, DomParent {
 		}
 		void parseError(string message) {
-			throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message));
+			throw new MarkupException("char "~to!string(pos)~" (line "~to!string(getLineNumber(pos))~"): " ~ message);
 		}
 		bool eatWhitespace() {
@ -860,7 +865,8 @@ class Document : FileResource, DomParent {
 				return Ele(0, readTextNode(), null);
 			}
-			enforce(data[pos] == '<');
+			if(data[pos] != '<')
 				throw new MarkupException("expected < not " ~ data[pos]);
 			pos++;
 			if(pos == data.length) {
 				if(strict)
@ -1113,10 +1119,9 @@ class Document : FileResource, DomParent {
 									selfClosed = true;
 						}
 						import std.algorithm.comparison;
 						if(strict) {
-						enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - data.contextToKeep) .. min(data.length, pos + data.contextToKeep)]));
+							if(data[pos] != '>')
 								throw new MarkupException("got "~data[pos]~" when expecting > (possible missing attribute name)\nContext:\n" ~ data[max(0, pos - data.contextToKeep) .. min(data.length, pos + data.contextToKeep)]);
 						} else {
 							// if we got here, it's probably because a slash was in an
 							// unquoted attribute - don't trust the selfClosed value
@ -1166,7 +1171,7 @@ class Document : FileResource, DomParent {
 								else
 									ending = indexOf(data[pos..$], closer);
-								ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes));
+								ending = indexOf(data[pos..$], closer, (loose ? false : true));
 								/*
 								if(loose && ending == -1 && pos < data.length)
 									ending = indexOf(data[pos..$], closer.toUpper());
@ -1225,7 +1230,7 @@ class Document : FileResource, DomParent {
 								bool found = false;
 								if(n.payload != tagName) {
 									if(strict)
-										parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted)));
+										parseError("mismatched tag: </"~n.payload~"> != <"~tagName~"> (opened on line "~to!string(getLineNumber(whereThisTagStarted))~")");
 									else {
 										sawImproperNesting = true;
 										// this is so we don't drop several levels of awful markup
@ -1721,7 +1726,6 @@ class Document : FileResource, DomParent {
 		because whitespace may be significant content in XML.
 	+/
 	string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const {
 		import std.string;
 		string s = prolog.strip;
 		/*
@ -1872,6 +1876,7 @@ unittest {
 unittest {
 	auto document = new Document(`<html>
 		<p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p>
 		<p>&#164; is the same thing.</p>
 		<p>&curren; is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p>
 		<p><![CDATA[xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.]]></p>
 	</html>`, true, true); // strict mode turned on
@ -1881,15 +1886,17 @@ unittest {
 	// no surprise on the first paragraph, we wrote it with the character, and it is still there in the D string
 	assert(paragraphs[0].textContent == "¤ is a non-ascii character. It will be converted to a numbered entity in string output.");
 	// but note on the second paragraph, the entity has been converted to the appropriate *character* in the object
-	assert(paragraphs[1].textContent == "¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.");
+	assert(paragraphs[1].textContent == "¤ is the same thing.");
 	assert(paragraphs[2].textContent == "¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.");
 	// and the CDATA bit is completely gone from the DOM; it just read it in as a text node. The txt content shows the text as a plain string:
-	assert(paragraphs[2].textContent == "xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.");
+	assert(paragraphs[3].textContent == "xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.");
 	// and the dom node beneath it is just a single text node; no trace of the original CDATA detail is left after parsing.
-	assert(paragraphs[2].childNodes.length == 1 && paragraphs[2].childNodes[0].nodeType == NodeType.Text);
+	assert(paragraphs[3].childNodes.length == 1 && paragraphs[3].childNodes[0].nodeType == NodeType.Text);
 	// And now, in the output string, we can see they are normalized thusly:
 	assert(document.toString() == "<!DOCTYPE html>\n<html>
 		<p>&#164; is a non-ascii character. It will be converted to a numbered entity in string output.</p>
 		<p>&#164; is the same thing.</p>
 		<p>&#164; is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p>
 		<p>xml cdata segments, which can contain &lt;tag&gt; looking things, are converted to encode the embedded special-to-xml characters to entities too.</p>
 	</html>");
@ -4144,8 +4151,7 @@ class Element : DomParent {
 		// i sort these for consistent output. might be more legible
 		// but especially it keeps it the same for diff purposes.
-		import std.algorithm : sort;
+		auto keys = sortStrings(attributes.keys);
 		auto keys = sort(attributes.keys);
 		foreach(n; keys) {
 			auto v = attributes[n];
 			s ~= " ";
@ -4398,8 +4404,6 @@ unittest {
 import std.string;
 /* domconvenience follows { */
 /// finds comments that match the given txt. Case insensitive, strips whitespace.
@ -5059,11 +5063,6 @@ string camelCase(string a) {
 // I need to maintain compatibility with the way it is now too.
 import std.string;
 import std.exception;
 import std.array;
 import std.range;
 //import std.stdio;
 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh
@ -5209,7 +5208,7 @@ string htmlEntitiesEncode(string data, Appender!string output = appender!string(
 		else if (!encodeNonAscii || (d < 128 && d > 0))
 			output.put(d);
 		else
-			output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";");
+			output.put("&#" ~ to!string(cast(int) d) ~ ";");
 	}
 	//assert(output !is null); // this fails on empty attributes.....
@ -5286,7 +5285,7 @@ dchar parseEntity(in dchar[] entity) {
 						return ' '; // this is really broken html
 					// done with dealing with broken stuff
-					auto p = std.conv.to!int(decimal);
+					auto p = to!int(decimal);
 					return cast(dchar) p;
 				}
 			} else
@ -5317,9 +5316,6 @@ unittest {
 	assert(parseEntity("&zwnj;"d) == '\u200c');
 }
 import std.utf;
 import std.stdio;
 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string.
 /// By default, it uses loose mode - it will try to return a useful string from garbage input too.
 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input.
@ -5369,7 +5365,7 @@ string htmlEntitiesDecode(string data, bool strict = false) {
 						foreach(char c; entityBeingTried[0 .. entityBeingTriedLength - 1]) // cut off the & we're on now
 							a ~= c;
 					} else {
-						a ~= buffer[0.. std.utf.encode(buffer, ch2)];
+						a ~= buffer[0.. utf_encode(buffer, ch2)];
 					}
 				}
@ -5378,7 +5374,7 @@ string htmlEntitiesDecode(string data, bool strict = false) {
 			} else
 			if(ch == ';') {
 				tryingEntity = false;
-				a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))];
+				a ~= buffer[0.. utf_encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))];
 			} else if(ch == ' ') {
 				// e.g. you &amp i
 				if(strict)
@ -5386,7 +5382,7 @@ string htmlEntitiesDecode(string data, bool strict = false) {
 				else {
 					tryingEntity = false;
 					a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength - 1]);
-					a ~= buffer[0 .. std.utf.encode(buffer, ch)];
+					a ~= buffer[0 .. utf_encode(buffer, ch)];
 				}
 			} else {
 				if(tryingNumericEntity) {
@ -5403,7 +5399,7 @@ string htmlEntitiesDecode(string data, bool strict = false) {
 							if(strict)
 								throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength]));
 							tryingEntity = false;
-							a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))];
+							a ~= buffer[0.. utf_encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))];
 							a ~= ch;
 							continue;
 						}
@ -5431,7 +5427,7 @@ string htmlEntitiesDecode(string data, bool strict = false) {
 				entityBeingTried[entityBeingTriedLength++] = ch;
 				entityAttemptIndex = 0;
 			} else {
-				a ~= buffer[0 .. std.utf.encode(buffer, ch)];
+				a ~= buffer[0 .. utf_encode(buffer, ch)];
 			}
 		}
 	}
@ -5781,9 +5777,8 @@ class TextNode : Element {
 		}
 		auto e = htmlEntitiesEncode(contents);
 		import std.algorithm.iteration : splitter;
 		bool first = true;
-		foreach(line; splitter(e, "\n")) {
+		foreach(line; LineSplitter(e)) {
 			if(first) {
 				s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith);
 				first = false;
@ -6231,8 +6226,6 @@ class Form : Element {
 +/
 }
 import std.conv;
 /++
 	Represents a HTML table. Has some convenience methods for working with tabular data.
 +/
@ -6619,8 +6612,6 @@ private immutable static string[] htmlInlineElements = [
 ];
 static import std.conv;
 /// helper function for decoding html entities
 int intFromHex(string hex) {
 	int place = 1;
@ -7074,7 +7065,7 @@ int intFromHex(string hex) {
 				if(e.parentNode is null)
 					return false;
-				auto among = retro(e.parentNode.childElements(e.tagName));
+				auto among = Retro!Element(e.parentNode.childElements(e.tagName));
 				if(!a.solvesFor(among, e))
 					return false;
@ -7136,7 +7127,7 @@ int intFromHex(string hex) {
 		}
 		string toString() {
-			return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of);
+			return (to!string(multiplier) ~ "n" ~ (adder >= 0 ? "+" : "") ~ to!string(adder) ~ (of.length ? " of " : "") ~ of);
 		}
 		bool solvesFor(R)(R elements, Element e) {
@ -7331,8 +7322,7 @@ int intFromHex(string hex) {
 			about mutating the dom as you iterate through this.
 		+/
 		auto getMatchingElementsLazy(Element start, Element relativeTo = null) {
-			import std.algorithm.iteration;
+			return ElementStreamFilter(start.tree, (Element a) => this.matchesElement(a, relativeTo));
 			return start.tree.filter!(a => this.matchesElement(a, relativeTo));
 		}
@ -7413,7 +7403,7 @@ int intFromHex(string hex) {
 			+/
 			}
-			foreach(part; retro(lparts)) {
+			foreach_reverse(part; lparts) {
 				 // writeln("matching ", where, " with ", part, " via ", lastSeparation);
 				 // writeln(parts);
@ -8339,11 +8329,13 @@ final class ElementStream {
 // unbelievable.
 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time.
 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) {
-	static import std.algorithm;
+	foreach(idx, b; haystack) {
-	auto found = std.algorithm.find(haystack, needle);
+		if(idx + needle.length > haystack.length)
-	if(found.length == 0)
+			return -1;
-		return -1;
+		if(haystack[idx .. idx + needle.length] == needle[])
-	return haystack.length - found.length;
+			return idx;
 	}
 	return -1;
 }
 private T[] insertAfter(T)(T[] arr, int position, T[] what) {
@ -8764,7 +8756,7 @@ class Event {
 		isBubbling = false;
-		foreach(e; chain.retro()) {
+		foreach(e; Retro!Element(chain)) {
 			if(eventName in e.capturingEventHandlers)
 			foreach(handler; e.capturingEventHandlers[eventName])
 				handler(e, this);
@ -9352,6 +9344,270 @@ unittest {
 	auto document = new Document("broken"); // just ensuring it doesn't crash
 }
 private long min(long a, long b) {
 	if(a < b)
 		return a;
 	return b;
 }
 private long max(long a, long b) {
 	if(a < b)
 		return b;
 	return a;
 }
 alias utf_encode = arsd.core.encodeUtf8;
 private struct Retro(T) {
 	T[] array;
 	size_t pos;
 	this(T[] array) {
 		this.array = array;
 		this.pos = array.length;
 	}
 	T front() {
 		return array[pos - 1];
 	}
 	void popFront() {
 		pos--;
 	}
 	bool empty() {
 		return pos > 0;
 	}
 }
 // import std.array; // for Appender
 private struct Appender(T : string) {
 	void put(string s) {
 		impl.data ~= s;
 	}
 	void put(char c) {
 		impl.data ~= c;
 	}
 	void put(dchar c) {
 		char[4] buffer;
 		impl.data ~= buffer[0 .. arsd.core.encodeUtf8(buffer, c)];
 	}
 	void reserve(size_t s) {
 		impl.data.reserve(s);
 	}
 	static struct Impl {
 		string data;
 	}
 	Impl* impl;
 	string data() {
 		return impl.data;
 	}
 	this(string start) {
 		impl = new Impl;
 		impl.data = start;
 	}
 }
 private Appender!string appender(T : string)() {
 	return Appender!string(null);
 }
 private string[] split(string s, string onWhat) {
 	string[] ret;
 	more:
 	auto idx = s.indexOf(onWhat);
 	if(idx == -1) {
 		ret ~= s;
 	} else {
 		ret ~= s[0 .. idx];
 		s = s[idx + onWhat.length .. $];
 		goto more;
 	}
 	return ret;
 }
 private string replace(string s, string replaceWhat, string withThis) {
 	string ret;
 	more:
 	auto idx = s.indexOf(replaceWhat);
 	if(idx == -1) {
 		ret ~= s;
 	} else {
 		ret ~= s[0 .. idx];
 		ret ~= withThis;
 		s = s[idx + replaceWhat.length .. $];
 		goto more;
 	}
 	return ret;
 }
 private @trusted string[] sortStrings(string[] obj) {
 	static extern(C) int comparator(scope const void* ra, scope const void* rb) {
 		string a = *cast(string*) ra;
 		string b = *cast(string*) rb;
 		return a < b;
 	}
 	import core.stdc.stdlib;
 	qsort(obj.ptr, obj.length, typeof(obj[0]).sizeof, &comparator);
 	return obj;
 }
 private struct LineSplitter {
 	string s;
 	size_t nextLineBreak;
 	this(string s) {
 		this.s = s;
 		popFront();
 	}
 	string front() {
 		return s[0 .. nextLineBreak];
 	}
 	void popFront() {
 		s = s[nextLineBreak .. $];
 		nextLineBreak = 0;
 		while(nextLineBreak < s.length) {
 			if(s[nextLineBreak] == '\n') {
 				nextLineBreak++;
 				return;
 			}
 			nextLineBreak++;
 		}
 	}
 	bool empty() {
 		return s.length == 0;
 	}
 }
 unittest {
 	foreach(line; LineSplitter("foo"))
 		assert(line == "foo");
 	int c;
 	foreach(line; LineSplitter("foo\nbar")) {
 		if(c == 0)
 			assert(line == "foo\n");
 		else if(c == 1)
 			assert(line == "bar");
 		c++;
 	}
 }
 private struct ElementStreamFilter {
 	ElementStream range;
 	bool delegate(Element e) filter;
 	this(ElementStream range, bool delegate(Element e) filter) {
 		this.range = range;
 		this.filter = filter;
 		if(!range.empty && !filter(range.front))
 			popFront();
 	}
 	void popFront() {
 		range.popFront;
 		while(!range.empty && !this.filter(range.front)) {
 			range.popFront();
 		}
 	}
 	bool empty() {
 		return range.empty;
 	}
 	Element front() {
 		return range.front;
 	}
 }
 alias arsd.core.indexOf indexOf;
 alias arsd.core.stripInternal strip;
 alias arsd.core.stripRightInternal stripRight;
 alias arsd.core.startsWith startsWith;
 alias arsd.core.endsWith endsWith;
 // FIXME: start index can be useful but i used 0 here anyway
 private size_t indexOf(string haystack, string needle, bool caseSensitive) {
 	if(!caseSensitive) {
 		haystack = toLower(haystack);
 		needle = toLower(needle);
 	}
 	return indexOf(haystack, needle);
 }
 private string to(T : string, F)(F f) {
 	return arsd.core.toStringInternal(f);
 }
 private int to(T : int, F)(F f) {
 	// NOT GENERIC DO NOT USE OUTSIDE OF THIS MODULE'S CONTEXT
 	int accumulator;
 	foreach(ch; f) {
 		accumulator *= 10;
 		accumulator += ch - '0';
 	}
 	return accumulator;
 }
 private char[] to(T : char[], F : dchar[])(F f) {
 	char[] s;
 	foreach(dc; f) {
 		char[4] buffer;
 		s ~= buffer[0 .. arsd.core.encodeUtf8(buffer, dc)];
 	}
 	return s;
 }
 private string to(T : string, F : const(dchar)[])(F f) {
 	return cast(string) to!(char[], dchar[])(cast(dchar[]) f);
 }
 private string toLower(string s) {
 	foreach(ch; s) {
 		if(ch >= 'A' && ch <= 'Z')
 			goto needed;
 	}
 	return s; // shortcut, no changes
 	needed:
 	char[] ret;
 	ret.length = s.length;
 	foreach(idx, ch; s) {
 		if(ch >= 'A' && ch <= 'Z')
 			ret[idx] = ch | 32;
 		else
 			ret[idx] = ch;
 	}
 	return cast(string) ret;
 }
 unittest {
 	assert("".toLower == "");
 	assert("foo".toLower == "foo");
 	assert("FaZ".toLower == "faz");
 	assert("423".toLower == "423");
 }
 private string toUpper(string s) {
 	foreach(ch; s) {
 		if(ch >= 'a' && ch <= 'z')
 			goto needed;
 	}
 	return s; // shortcut, no changes
 	needed:
 	char[] ret;
 	ret.length = s.length;
 	foreach(idx, ch; s) {
 		if(ch >= 'a' && ch <= 'z')
 			ret[idx] = ch & ~32;
 		else
 			ret[idx] = ch;
 	}
 	return cast(string) ret;
 }
 unittest {
 	assert("".toUpper == "");
 	assert("foo".toUpper == "FOO");
 	assert("FaZ".toUpper == "FAZ");
 	assert("423".toUpper == "423");
 }
 /*
 Copyright: Adam D. Ruppe, 2010 - 2023