| // Written in the D programming language. |
| |
| /** |
| $(RED Warning: This module is considered out-dated and not up to Phobos' |
| current standards. It will remain until we have a suitable replacement, |
| but be aware that it will not remain long term.) |
| |
| Classes and functions for creating and parsing XML |
| |
| The basic architecture of this module is that there are standalone functions, |
| classes for constructing an XML document from scratch (Tag, Element and |
| Document), and also classes for parsing a pre-existing XML file (ElementParser |
| and DocumentParser). The parsing classes <i>may</i> be used to build a |
| Document, but that is not their primary purpose. The handling capabilities of |
| DocumentParser and ElementParser are sufficiently customizable that you can |
| make them do pretty much whatever you want. |
| |
| Example: This example creates a DOM (Document Object Model) tree |
| from an XML file. |
| ------------------------------------------------------------------------------ |
| import std.xml; |
| import std.stdio; |
| import std.string; |
| import std.file; |
| |
| // books.xml is used in various samples throughout the Microsoft XML Core |
| // Services (MSXML) SDK. |
| // |
| // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx |
| |
| void main() |
| { |
| string s = cast(string) std.file.read("books.xml"); |
| |
| // Check for well-formedness |
| check(s); |
| |
| // Make a DOM tree |
| auto doc = new Document(s); |
| |
| // Plain-print it |
| writeln(doc); |
| } |
| ------------------------------------------------------------------------------ |
| |
| Example: This example does much the same thing, except that the file is |
| deconstructed and reconstructed by hand. This is more work, but the |
| techniques involved offer vastly more power. |
| ------------------------------------------------------------------------------ |
| import std.xml; |
| import std.stdio; |
| import std.string; |
| |
| struct Book |
| { |
| string id; |
| string author; |
| string title; |
| string genre; |
| string price; |
| string pubDate; |
| string description; |
| } |
| |
| void main() |
| { |
| string s = cast(string) std.file.read("books.xml"); |
| |
| // Check for well-formedness |
| check(s); |
| |
| // Take it apart |
| Book[] books; |
| |
| auto xml = new DocumentParser(s); |
| xml.onStartTag["book"] = (ElementParser xml) |
| { |
| Book book; |
| book.id = xml.tag.attr["id"]; |
| |
| xml.onEndTag["author"] = (in Element e) { book.author = e.text(); }; |
| xml.onEndTag["title"] = (in Element e) { book.title = e.text(); }; |
| xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); }; |
| xml.onEndTag["price"] = (in Element e) { book.price = e.text(); }; |
| xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); }; |
| xml.onEndTag["description"] = (in Element e) { book.description = e.text(); }; |
| |
| xml.parse(); |
| |
| books ~= book; |
| }; |
| xml.parse(); |
| |
| // Put it back together again; |
| auto doc = new Document(new Tag("catalog")); |
| foreach (book;books) |
| { |
| auto element = new Element("book"); |
| element.tag.attr["id"] = book.id; |
| |
| element ~= new Element("author", book.author); |
| element ~= new Element("title", book.title); |
| element ~= new Element("genre", book.genre); |
| element ~= new Element("price", book.price); |
| element ~= new Element("publish-date",book.pubDate); |
| element ~= new Element("description", book.description); |
| |
| doc ~= element; |
| } |
| |
| // Pretty-print it |
| writefln(join(doc.pretty(3),"\n")); |
| } |
| ------------------------------------------------------------------------------- |
| Copyright: Copyright Janice Caron 2008 - 2009. |
| License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). |
| Authors: Janice Caron |
| Source: $(PHOBOSSRC std/_xml.d) |
| */ |
| /* |
| Copyright Janice Caron 2008 - 2009. |
| Distributed under the Boost Software License, Version 1.0. |
| (See accompanying file LICENSE_1_0.txt or copy at |
| http://www.boost.org/LICENSE_1_0.txt) |
| */ |
| module std.xml; |
| |
| enum cdata = "<![CDATA["; |
| |
| /** |
| * Returns true if the character is a character according to the XML standard |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * c = the character to be tested |
| */ |
| bool isChar(dchar c) @safe @nogc pure nothrow // rule 2 |
| { |
| if (c <= 0xD7FF) |
| { |
| if (c >= 0x20) |
| return true; |
| switch (c) |
| { |
| case 0xA: |
| case 0x9: |
| case 0xD: |
| return true; |
| default: |
| return false; |
| } |
| } |
| else if (0xE000 <= c && c <= 0x10FFFF) |
| { |
| if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF |
| return true; |
| } |
| return false; |
| } |
| |
| @safe @nogc nothrow pure unittest |
| { |
| assert(!isChar(cast(dchar) 0x8)); |
| assert( isChar(cast(dchar) 0x9)); |
| assert( isChar(cast(dchar) 0xA)); |
| assert(!isChar(cast(dchar) 0xB)); |
| assert(!isChar(cast(dchar) 0xC)); |
| assert( isChar(cast(dchar) 0xD)); |
| assert(!isChar(cast(dchar) 0xE)); |
| assert(!isChar(cast(dchar) 0x1F)); |
| assert( isChar(cast(dchar) 0x20)); |
| assert( isChar('J')); |
| assert( isChar(cast(dchar) 0xD7FF)); |
| assert(!isChar(cast(dchar) 0xD800)); |
| assert(!isChar(cast(dchar) 0xDFFF)); |
| assert( isChar(cast(dchar) 0xE000)); |
| assert( isChar(cast(dchar) 0xFFFD)); |
| assert(!isChar(cast(dchar) 0xFFFE)); |
| assert(!isChar(cast(dchar) 0xFFFF)); |
| assert( isChar(cast(dchar) 0x10000)); |
| assert( isChar(cast(dchar) 0x10FFFF)); |
| assert(!isChar(cast(dchar) 0x110000)); |
| |
| debug (stdxml_TestHardcodedChecks) |
| { |
| foreach (c; 0 .. dchar.max + 1) |
| assert(isChar(c) == lookup(CharTable, c)); |
| } |
| } |
| |
| /** |
| * Returns true if the character is whitespace according to the XML standard |
| * |
| * Only the following characters are considered whitespace in XML - space, tab, |
| * carriage return and linefeed |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * c = the character to be tested |
| */ |
| bool isSpace(dchar c) @safe @nogc pure nothrow |
| { |
| return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; |
| } |
| |
| /** |
| * Returns true if the character is a digit according to the XML standard |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * c = the character to be tested |
| */ |
| bool isDigit(dchar c) @safe @nogc pure nothrow |
| { |
| if (c <= 0x0039 && c >= 0x0030) |
| return true; |
| else |
| return lookup(DigitTable,c); |
| } |
| |
| @safe @nogc nothrow pure unittest |
| { |
| debug (stdxml_TestHardcodedChecks) |
| { |
| foreach (c; 0 .. dchar.max + 1) |
| assert(isDigit(c) == lookup(DigitTable, c)); |
| } |
| } |
| |
| /** |
| * Returns true if the character is a letter according to the XML standard |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * c = the character to be tested |
| */ |
| bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84 |
| { |
| return isIdeographic(c) || isBaseChar(c); |
| } |
| |
| /** |
| * Returns true if the character is an ideographic character according to the |
| * XML standard |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * c = the character to be tested |
| */ |
| bool isIdeographic(dchar c) @safe @nogc nothrow pure |
| { |
| if (c == 0x3007) |
| return true; |
| if (c <= 0x3029 && c >= 0x3021 ) |
| return true; |
| if (c <= 0x9FA5 && c >= 0x4E00) |
| return true; |
| return false; |
| } |
| |
| @safe @nogc nothrow pure unittest |
| { |
| assert(isIdeographic('\u4E00')); |
| assert(isIdeographic('\u9FA5')); |
| assert(isIdeographic('\u3007')); |
| assert(isIdeographic('\u3021')); |
| assert(isIdeographic('\u3029')); |
| |
| debug (stdxml_TestHardcodedChecks) |
| { |
| foreach (c; 0 .. dchar.max + 1) |
| assert(isIdeographic(c) == lookup(IdeographicTable, c)); |
| } |
| } |
| |
| /** |
| * Returns true if the character is a base character according to the XML |
| * standard |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * c = the character to be tested |
| */ |
| bool isBaseChar(dchar c) @safe @nogc nothrow pure |
| { |
| return lookup(BaseCharTable,c); |
| } |
| |
| /** |
| * Returns true if the character is a combining character according to the |
| * XML standard |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * c = the character to be tested |
| */ |
| bool isCombiningChar(dchar c) @safe @nogc nothrow pure |
| { |
| return lookup(CombiningCharTable,c); |
| } |
| |
| /** |
| * Returns true if the character is an extender according to the XML standard |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * c = the character to be tested |
| */ |
| bool isExtender(dchar c) @safe @nogc nothrow pure |
| { |
| return lookup(ExtenderTable,c); |
| } |
| |
| /** |
| * Encodes a string by replacing all characters which need to be escaped with |
| * appropriate predefined XML entities. |
| * |
| * encode() escapes certain characters (ampersand, quote, apostrophe, less-than |
| * and greater-than), and similarly, decode() unescapes them. These functions |
| * are provided for convenience only. You do not need to use them when using |
| * the std.xml classes, because then all the encoding and decoding will be done |
| * for you automatically. |
| * |
| * If the string is not modified, the original will be returned. |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * s = The string to be encoded |
| * |
| * Returns: The encoded string |
| * |
| * Example: |
| * -------------- |
| * writefln(encode("a > b")); // writes "a > b" |
| * -------------- |
| */ |
| S encode(S)(S s) |
| { |
| import std.array : appender; |
| |
| string r; |
| size_t lastI; |
| auto result = appender!S(); |
| |
| foreach (i, c; s) |
| { |
| switch (c) |
| { |
| case '&': r = "&"; break; |
| case '"': r = """; break; |
| case '\'': r = "'"; break; |
| case '<': r = "<"; break; |
| case '>': r = ">"; break; |
| default: continue; |
| } |
| // Replace with r |
| result.put(s[lastI .. i]); |
| result.put(r); |
| lastI = i + 1; |
| } |
| |
| if (!result.data.ptr) return s; |
| result.put(s[lastI .. $]); |
| return result.data; |
| } |
| |
| @safe pure unittest |
| { |
| auto s = "hello"; |
| assert(encode(s) is s); |
| assert(encode("a > b") == "a > b", encode("a > b")); |
| assert(encode("a < b") == "a < b"); |
| assert(encode("don't") == "don't"); |
| assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); |
| assert(encode("cat & dog") == "cat & dog"); |
| } |
| |
| /** |
| * Mode to use for decoding. |
| * |
| * $(DDOC_ENUM_MEMBERS NONE) Do not decode |
| * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors |
| * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error |
| */ |
| enum DecodeMode |
| { |
| NONE, LOOSE, STRICT |
| } |
| |
| /** |
| * Decodes a string by unescaping all predefined XML entities. |
| * |
| * encode() escapes certain characters (ampersand, quote, apostrophe, less-than |
| * and greater-than), and similarly, decode() unescapes them. These functions |
| * are provided for convenience only. You do not need to use them when using |
| * the std.xml classes, because then all the encoding and decoding will be done |
| * for you automatically. |
| * |
| * This function decodes the entities &amp;, &quot;, &apos;, |
| * &lt; and &gt, |
| * as well as decimal and hexadecimal entities such as &#x20AC; |
| * |
| * If the string does not contain an ampersand, the original will be returned. |
| * |
| * Note that the "mode" parameter can be one of DecodeMode.NONE (do not |
| * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT |
| * (decode, and throw a DecodeException in the event of an error). |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Params: |
| * s = The string to be decoded |
| * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). |
| * |
| * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails |
| * |
| * Returns: The decoded string |
| * |
| * Example: |
| * -------------- |
| * writefln(decode("a > b")); // writes "a > b" |
| * -------------- |
| */ |
| string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure |
| { |
| import std.algorithm.searching : startsWith; |
| |
| if (mode == DecodeMode.NONE) return s; |
| |
| string buffer; |
| foreach (ref i; 0 .. s.length) |
| { |
| char c = s[i]; |
| if (c != '&') |
| { |
| if (buffer.length != 0) buffer ~= c; |
| } |
| else |
| { |
| if (buffer.length == 0) |
| { |
| buffer = s[0 .. i].dup; |
| } |
| if (startsWith(s[i..$],"&#")) |
| { |
| try |
| { |
| dchar d; |
| string t = s[i..$]; |
| checkCharRef(t, d); |
| char[4] temp; |
| import std.utf : encode; |
| buffer ~= temp[0 .. encode(temp, d)]; |
| i = s.length - t.length - 1; |
| } |
| catch (Err e) |
| { |
| if (mode == DecodeMode.STRICT) |
| throw new DecodeException("Unescaped &"); |
| buffer ~= '&'; |
| } |
| } |
| else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } |
| else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } |
| else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } |
| else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } |
| else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } |
| else |
| { |
| if (mode == DecodeMode.STRICT) |
| throw new DecodeException("Unescaped &"); |
| buffer ~= '&'; |
| } |
| } |
| } |
| return (buffer.length == 0) ? s : buffer; |
| } |
| |
| @safe pure unittest |
| { |
| void assertNot(string s) pure |
| { |
| bool b = false; |
| try { decode(s,DecodeMode.STRICT); } |
| catch (DecodeException e) { b = true; } |
| assert(b,s); |
| } |
| |
| // Assert that things that should work, do |
| auto s = "hello"; |
| assert(decode(s, DecodeMode.STRICT) is s); |
| assert(decode("a > b", DecodeMode.STRICT) == "a > b"); |
| assert(decode("a < b", DecodeMode.STRICT) == "a < b"); |
| assert(decode("don't", DecodeMode.STRICT) == "don't"); |
| assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); |
| assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); |
| assert(decode("*", DecodeMode.STRICT) == "*"); |
| assert(decode("*", DecodeMode.STRICT) == "*"); |
| assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); |
| assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); |
| assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); |
| assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); |
| assert(decode("G;", DecodeMode.LOOSE) == "G;"); |
| assert(decode("G;", DecodeMode.LOOSE) == "G;"); |
| |
| // Assert that things that shouldn't work, don't |
| assertNot("cat & dog"); |
| assertNot("a > b"); |
| assertNot("&#;"); |
| assertNot("&#x;"); |
| assertNot("G;"); |
| assertNot("G;"); |
| } |
| |
| /** |
| * Class representing an XML document. |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| */ |
| class Document : Element |
| { |
| /** |
| * Contains all text which occurs before the root element. |
| * Defaults to <?xml version="1.0"?> |
| */ |
| string prolog = "<?xml version=\"1.0\"?>"; |
| /** |
| * Contains all text which occurs after the root element. |
| * Defaults to the empty string |
| */ |
| string epilog; |
| |
| /** |
| * Constructs a Document by parsing XML text. |
| * |
| * This function creates a complete DOM (Document Object Model) tree. |
| * |
| * The input to this function MUST be valid XML. |
| * This is enforced by DocumentParser's in contract. |
| * |
| * Params: |
| * s = the complete XML text. |
| */ |
| this(string s) |
| in |
| { |
| assert(s.length != 0); |
| } |
| body |
| { |
| auto xml = new DocumentParser(s); |
| string tagString = xml.tag.tagString; |
| |
| this(xml.tag); |
| prolog = s[0 .. tagString.ptr - s.ptr]; |
| parse(xml); |
| epilog = *xml.s; |
| } |
| |
| /** |
| * Constructs a Document from a Tag. |
| * |
| * Params: |
| * tag = the start tag of the document. |
| */ |
| this(const(Tag) tag) |
| { |
| super(tag); |
| } |
| |
| const |
| { |
| /** |
| * Compares two Documents for equality |
| * |
| * Example: |
| * -------------- |
| * Document d1,d2; |
| * if (d1 == d2) { } |
| * -------------- |
| */ |
| override bool opEquals(scope const Object o) const |
| { |
| const doc = toType!(const Document)(o); |
| return prolog == doc.prolog |
| && (cast(const) this).Element.opEquals(cast(const) doc) |
| && epilog == doc.epilog; |
| } |
| |
| /** |
| * Compares two Documents |
| * |
| * You should rarely need to call this function. It exists so that |
| * Documents can be used as associative array keys. |
| * |
| * Example: |
| * -------------- |
| * Document d1,d2; |
| * if (d1 < d2) { } |
| * -------------- |
| */ |
| override int opCmp(scope const Object o) scope const |
| { |
| const doc = toType!(const Document)(o); |
| if (prolog != doc.prolog) |
| return prolog < doc.prolog ? -1 : 1; |
| if (int cmp = this.Element.opCmp(doc)) |
| return cmp; |
| if (epilog != doc.epilog) |
| return epilog < doc.epilog ? -1 : 1; |
| return 0; |
| } |
| |
| /** |
| * Returns the hash of a Document |
| * |
| * You should rarely need to call this function. It exists so that |
| * Documents can be used as associative array keys. |
| */ |
| override size_t toHash() scope const @trusted |
| { |
| return hash(prolog, hash(epilog, (cast() this).Element.toHash())); |
| } |
| |
| /** |
| * Returns the string representation of a Document. (That is, the |
| * complete XML of a document). |
| */ |
| override string toString() scope const @safe |
| { |
| return prolog ~ super.toString() ~ epilog; |
| } |
| } |
| } |
| |
| @system unittest |
| { |
| // https://issues.dlang.org/show_bug.cgi?id=14966 |
| auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`; |
| |
| auto a = new Document(xml); |
| auto b = new Document(xml); |
| assert(a == b); |
| assert(!(a < b)); |
| int[Document] aa; |
| aa[a] = 1; |
| assert(aa[b] == 1); |
| |
| b ~= new Element("b"); |
| assert(a < b); |
| assert(b > a); |
| } |
| |
| /** |
| * Class representing an XML element. |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| */ |
| class Element : Item |
| { |
| Tag tag; /// The start tag of the element |
| Item[] items; /// The element's items |
| Text[] texts; /// The element's text items |
| CData[] cdatas; /// The element's CData items |
| Comment[] comments; /// The element's comments |
| ProcessingInstruction[] pis; /// The element's processing instructions |
| Element[] elements; /// The element's child elements |
| |
| /** |
| * Constructs an Element given a name and a string to be used as a Text |
| * interior. |
| * |
| * Params: |
| * name = the name of the element. |
| * interior = (optional) the string interior. |
| * |
| * Example: |
| * ------------------------------------------------------- |
| * auto element = new Element("title","Serenity") |
| * // constructs the element <title>Serenity</title> |
| * ------------------------------------------------------- |
| */ |
| this(string name, string interior=null) @safe pure |
| { |
| this(new Tag(name)); |
| if (interior.length != 0) opCatAssign(new Text(interior)); |
| } |
| |
| /** |
| * Constructs an Element from a Tag. |
| * |
| * Params: |
| * tag_ = the start or empty tag of the element. |
| */ |
| this(const(Tag) tag_) @safe pure |
| { |
| this.tag = new Tag(tag_.name); |
| tag.type = TagType.EMPTY; |
| foreach (k,v;tag_.attr) tag.attr[k] = v; |
| tag.tagString = tag_.tagString; |
| } |
| |
| /** |
| * Append a text item to the interior of this element |
| * |
| * Params: |
| * item = the item you wish to append. |
| * |
| * Example: |
| * -------------- |
| * Element element; |
| * element ~= new Text("hello"); |
| * -------------- |
| */ |
| void opCatAssign(Text item) @safe pure |
| { |
| texts ~= item; |
| appendItem(item); |
| } |
| |
| /** |
| * Append a CData item to the interior of this element |
| * |
| * Params: |
| * item = the item you wish to append. |
| * |
| * Example: |
| * -------------- |
| * Element element; |
| * element ~= new CData("hello"); |
| * -------------- |
| */ |
| void opCatAssign(CData item) @safe pure |
| { |
| cdatas ~= item; |
| appendItem(item); |
| } |
| |
| /** |
| * Append a comment to the interior of this element |
| * |
| * Params: |
| * item = the item you wish to append. |
| * |
| * Example: |
| * -------------- |
| * Element element; |
| * element ~= new Comment("hello"); |
| * -------------- |
| */ |
| void opCatAssign(Comment item) @safe pure |
| { |
| comments ~= item; |
| appendItem(item); |
| } |
| |
| /** |
| * Append a processing instruction to the interior of this element |
| * |
| * Params: |
| * item = the item you wish to append. |
| * |
| * Example: |
| * -------------- |
| * Element element; |
| * element ~= new ProcessingInstruction("hello"); |
| * -------------- |
| */ |
| void opCatAssign(ProcessingInstruction item) @safe pure |
| { |
| pis ~= item; |
| appendItem(item); |
| } |
| |
| /** |
| * Append a complete element to the interior of this element |
| * |
| * Params: |
| * item = the item you wish to append. |
| * |
| * Example: |
| * -------------- |
| * Element element; |
| * Element other = new Element("br"); |
| * element ~= other; |
| * // appends element representing <br /> |
| * -------------- |
| */ |
| void opCatAssign(Element item) @safe pure |
| { |
| elements ~= item; |
| appendItem(item); |
| } |
| |
| private void appendItem(Item item) @safe pure |
| { |
| items ~= item; |
| if (tag.type == TagType.EMPTY && !item.isEmptyXML) |
| tag.type = TagType.START; |
| } |
| |
| private void parse(ElementParser xml) |
| { |
| xml.onText = (string s) { opCatAssign(new Text(s)); }; |
| xml.onCData = (string s) { opCatAssign(new CData(s)); }; |
| xml.onComment = (string s) { opCatAssign(new Comment(s)); }; |
| xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); }; |
| |
| xml.onStartTag[null] = (ElementParser xml) |
| { |
| auto e = new Element(xml.tag); |
| e.parse(xml); |
| opCatAssign(e); |
| }; |
| |
| xml.parse(); |
| } |
| |
| /** |
| * Compares two Elements for equality |
| * |
| * Example: |
| * -------------- |
| * Element e1,e2; |
| * if (e1 == e2) { } |
| * -------------- |
| */ |
| override bool opEquals(scope const Object o) const |
| { |
| const element = toType!(const Element)(o); |
| immutable len = items.length; |
| if (len != element.items.length) return false; |
| foreach (i; 0 .. len) |
| { |
| if (!items[i].opEquals(element.items[i])) return false; |
| } |
| return true; |
| } |
| |
| /** |
| * Compares two Elements |
| * |
| * You should rarely need to call this function. It exists so that Elements |
| * can be used as associative array keys. |
| * |
| * Example: |
| * -------------- |
| * Element e1,e2; |
| * if (e1 < e2) { } |
| * -------------- |
| */ |
| override int opCmp(scope const Object o) @safe const |
| { |
| const element = toType!(const Element)(o); |
| for (uint i=0; ; ++i) |
| { |
| if (i == items.length && i == element.items.length) return 0; |
| if (i == items.length) return -1; |
| if (i == element.items.length) return 1; |
| if (!items[i].opEquals(element.items[i])) |
| return items[i].opCmp(element.items[i]); |
| } |
| } |
| |
| /** |
| * Returns the hash of an Element |
| * |
| * You should rarely need to call this function. It exists so that Elements |
| * can be used as associative array keys. |
| */ |
| override size_t toHash() scope const @safe |
| { |
| size_t hash = tag.toHash(); |
| foreach (item;items) hash += item.toHash(); |
| return hash; |
| } |
| |
| const |
| { |
| /** |
| * Returns the decoded interior of an element. |
| * |
| * The element is assumed to contain text <i>only</i>. So, for |
| * example, given XML such as "<title>Good &amp; |
| * Bad</title>", will return "Good & Bad". |
| * |
| * Params: |
| * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). |
| * |
| * Throws: DecodeException if decode fails |
| */ |
| string text(DecodeMode mode=DecodeMode.LOOSE) |
| { |
| string buffer; |
| foreach (item;items) |
| { |
| Text t = cast(Text) item; |
| if (t is null) throw new DecodeException(item.toString()); |
| buffer ~= decode(t.toString(),mode); |
| } |
| return buffer; |
| } |
| |
| /** |
| * Returns an indented string representation of this item |
| * |
| * Params: |
| * indent = (optional) number of spaces by which to indent this |
| * element. Defaults to 2. |
| */ |
| override string[] pretty(uint indent=2) scope |
| { |
| import std.algorithm.searching : count; |
| import std.string : rightJustify; |
| |
| if (isEmptyXML) return [ tag.toEmptyString() ]; |
| |
| if (items.length == 1) |
| { |
| auto t = cast(const(Text))(items[0]); |
| if (t !is null) |
| { |
| return [tag.toStartString() ~ t.toString() ~ tag.toEndString()]; |
| } |
| } |
| |
| string[] a = [ tag.toStartString() ]; |
| foreach (item;items) |
| { |
| string[] b = item.pretty(indent); |
| foreach (s;b) |
| { |
| a ~= rightJustify(s,count(s) + indent); |
| } |
| } |
| a ~= tag.toEndString(); |
| return a; |
| } |
| |
| /** |
| * Returns the string representation of an Element |
| * |
| * Example: |
| * -------------- |
| * auto element = new Element("br"); |
| * writefln(element.toString()); // writes "<br />" |
| * -------------- |
| */ |
| override string toString() scope @safe |
| { |
| if (isEmptyXML) return tag.toEmptyString(); |
| |
| string buffer = tag.toStartString(); |
| foreach (item;items) { buffer ~= item.toString(); } |
| buffer ~= tag.toEndString(); |
| return buffer; |
| } |
| |
| override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; } |
| } |
| } |
| |
| /** |
| * Tag types. |
| * |
| * $(DDOC_ENUM_MEMBERS START) Used for start tags |
| * $(DDOC_ENUM_MEMBERS END) Used for end tags |
| * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags |
| * |
| */ |
| enum TagType { START, END, EMPTY } |
| |
| /** |
| * Class representing an XML tag. |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * The class invariant guarantees |
| * <ul> |
| * <li> that $(B type) is a valid enum TagType value</li> |
| * <li> that $(B name) consists of valid characters</li> |
| * <li> that each attribute name consists of valid characters</li> |
| * </ul> |
| */ |
| class Tag |
| { |
| TagType type = TagType.START; /// Type of tag |
| string name; /// Tag name |
| string[string] attr; /// Associative array of attributes |
| private string tagString; |
| |
| invariant() |
| { |
| string s; |
| string t; |
| |
| assert(type == TagType.START |
| || type == TagType.END |
| || type == TagType.EMPTY); |
| |
| s = name; |
| try { checkName(s,t); } |
| catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); } |
| |
| foreach (k,v;attr) |
| { |
| s = k; |
| try { checkName(s,t); } |
| catch (Err e) |
| { assert(false,"Invalid atrribute name:" ~ e.toString()); } |
| } |
| } |
| |
| /** |
| * Constructs an instance of Tag with a specified name and type |
| * |
| * The constructor does not initialize the attributes. To initialize the |
| * attributes, you access the $(B attr) member variable. |
| * |
| * Params: |
| * name = the Tag's name |
| * type = (optional) the Tag's type. If omitted, defaults to |
| * TagType.START. |
| * |
| * Example: |
| * -------------- |
| * auto tag = new Tag("img",Tag.EMPTY); |
| * tag.attr["src"] = "http://example.com/example.jpg"; |
| * -------------- |
| */ |
| this(string name, TagType type=TagType.START) @safe pure |
| { |
| this.name = name; |
| this.type = type; |
| } |
| |
| /* Private constructor (so don't ddoc this!) |
| * |
| * Constructs a Tag by parsing the string representation, e.g. "<html>". |
| * |
| * The string is passed by reference, and is advanced over all characters |
| * consumed. |
| * |
| * The second parameter is a dummy parameter only, required solely to |
| * distinguish this constructor from the public one. |
| */ |
| private this(ref string s, bool dummy) @safe pure |
| { |
| import std.algorithm.searching : countUntil; |
| import std.ascii : isWhite; |
| import std.utf : byCodeUnit; |
| |
| tagString = s; |
| try |
| { |
| reqc(s,'<'); |
| if (optc(s,'/')) type = TagType.END; |
| ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f"); |
| name = s[0 .. i]; |
| s = s[i .. $]; |
| |
| i = s.byCodeUnit.countUntil!(a => !isWhite(a)); |
| s = s[i .. $]; |
| |
| while (s.length > 0 && s[0] != '>' && s[0] != '/') |
| { |
| i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f"); |
| string key = s[0 .. i]; |
| s = s[i .. $]; |
| |
| i = s.byCodeUnit.countUntil!(a => !isWhite(a)); |
| s = s[i .. $]; |
| reqc(s,'='); |
| i = s.byCodeUnit.countUntil!(a => !isWhite(a)); |
| s = s[i .. $]; |
| |
| immutable char quote = requireOneOf(s,"'\""); |
| i = s.byCodeUnit.countUntil(quote); |
| string val = decode(s[0 .. i], DecodeMode.LOOSE); |
| s = s[i .. $]; |
| reqc(s,quote); |
| |
| i = s.byCodeUnit.countUntil!(a => !isWhite(a)); |
| s = s[i .. $]; |
| attr[key] = val; |
| } |
| if (optc(s,'/')) |
| { |
| if (type == TagType.END) throw new TagException(""); |
| type = TagType.EMPTY; |
| } |
| reqc(s,'>'); |
| tagString.length = tagString.length - s.length; |
| } |
| catch (XMLException e) |
| { |
| tagString.length = tagString.length - s.length; |
| throw new TagException(tagString); |
| } |
| } |
| |
| const |
| { |
| /** |
| * Compares two Tags for equality |
| * |
| * You should rarely need to call this function. It exists so that Tags |
| * can be used as associative array keys. |
| * |
| * Example: |
| * -------------- |
| * Tag tag1,tag2 |
| * if (tag1 == tag2) { } |
| * -------------- |
| */ |
| override bool opEquals(scope Object o) |
| { |
| const tag = toType!(const Tag)(o); |
| return |
| (name != tag.name) ? false : ( |
| (attr != tag.attr) ? false : ( |
| (type != tag.type) ? false : ( |
| true ))); |
| } |
| |
| /** |
| * Compares two Tags |
| * |
| * Example: |
| * -------------- |
| * Tag tag1,tag2 |
| * if (tag1 < tag2) { } |
| * -------------- |
| */ |
| override int opCmp(Object o) |
| { |
| const tag = toType!(const Tag)(o); |
| // Note that attr is an AA, so the comparison is nonsensical (bug 10381) |
| return |
| ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : |
| ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) : |
| ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : |
| 0 ))); |
| } |
| |
| /** |
| * Returns the hash of a Tag |
| * |
| * You should rarely need to call this function. It exists so that Tags |
| * can be used as associative array keys. |
| */ |
| override size_t toHash() |
| { |
| return typeid(name).getHash(&name); |
| } |
| |
| /** |
| * Returns the string representation of a Tag |
| * |
| * Example: |
| * -------------- |
| * auto tag = new Tag("book",TagType.START); |
| * writefln(tag.toString()); // writes "<book>" |
| * -------------- |
| */ |
| override string toString() @safe |
| { |
| if (isEmpty) return toEmptyString(); |
| return (isEnd) ? toEndString() : toStartString(); |
| } |
| |
| private |
| { |
| string toNonEndString() @safe |
| { |
| import std.format : format; |
| |
| string s = "<" ~ name; |
| foreach (key,val;attr) |
| s ~= format(" %s=\"%s\"",key,encode(val)); |
| return s; |
| } |
| |
| string toStartString() @safe { return toNonEndString() ~ ">"; } |
| |
| string toEndString() @safe { return "</" ~ name ~ ">"; } |
| |
| string toEmptyString() @safe { return toNonEndString() ~ " />"; } |
| } |
| |
| /** |
| * Returns true if the Tag is a start tag |
| * |
| * Example: |
| * -------------- |
| * if (tag.isStart) { } |
| * -------------- |
| */ |
| @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; } |
| |
| /** |
| * Returns true if the Tag is an end tag |
| * |
| * Example: |
| * -------------- |
| * if (tag.isEnd) { } |
| * -------------- |
| */ |
| @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; } |
| |
| /** |
| * Returns true if the Tag is an empty tag |
| * |
| * Example: |
| * -------------- |
| * if (tag.isEmpty) { } |
| * -------------- |
| */ |
| @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; } |
| } |
| } |
| |
| /** |
| * Class representing a comment |
| */ |
| class Comment : Item |
| { |
| private string content; |
| |
| /** |
| * Construct a comment |
| * |
| * Params: |
| * content = the body of the comment |
| * |
| * Throws: CommentException if the comment body is illegal (contains "--" |
| * or exactly equals "-") |
| * |
| * Example: |
| * -------------- |
| * auto item = new Comment("This is a comment"); |
| * // constructs <!--This is a comment--> |
| * -------------- |
| */ |
| this(string content) @safe pure |
| { |
| import std.string : indexOf; |
| |
| if (content == "-" || content.indexOf("--") != -1) |
| throw new CommentException(content); |
| this.content = content; |
| } |
| |
| /** |
| * Compares two comments for equality |
| * |
| * Example: |
| * -------------- |
| * Comment item1,item2; |
| * if (item1 == item2) { } |
| * -------------- |
| */ |
| override bool opEquals(scope const Object o) const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const Comment) item; |
| return t !is null && content == t.content; |
| } |
| |
| /** |
| * Compares two comments |
| * |
| * You should rarely need to call this function. It exists so that Comments |
| * can be used as associative array keys. |
| * |
| * Example: |
| * -------------- |
| * Comment item1,item2; |
| * if (item1 < item2) { } |
| * -------------- |
| */ |
| override int opCmp(scope const Object o) scope const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const Comment) item; |
| return t !is null && (content != t.content |
| ? (content < t.content ? -1 : 1 ) : 0 ); |
| } |
| |
| /** |
| * Returns the hash of a Comment |
| * |
| * You should rarely need to call this function. It exists so that Comments |
| * can be used as associative array keys. |
| */ |
| override size_t toHash() scope const nothrow { return hash(content); } |
| |
| /** |
| * Returns a string representation of this comment |
| */ |
| override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; } |
| |
| override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always |
| } |
| |
| @safe unittest // issue 16241 |
| { |
| import std.exception : assertThrown; |
| auto c = new Comment("=="); |
| assert(c.content == "=="); |
| assertThrown!CommentException(new Comment("--")); |
| } |
| |
| /** |
| * Class representing a Character Data section |
| */ |
| class CData : Item |
| { |
| private string content; |
| |
| /** |
| * Construct a character data section |
| * |
| * Params: |
| * content = the body of the character data segment |
| * |
| * Throws: CDataException if the segment body is illegal (contains "]]>") |
| * |
| * Example: |
| * -------------- |
| * auto item = new CData("<b>hello</b>"); |
| * // constructs <![CDATA[<b>hello</b>]]> |
| * -------------- |
| */ |
| this(string content) @safe pure |
| { |
| import std.string : indexOf; |
| if (content.indexOf("]]>") != -1) throw new CDataException(content); |
| this.content = content; |
| } |
| |
| /** |
| * Compares two CDatas for equality |
| * |
| * Example: |
| * -------------- |
| * CData item1,item2; |
| * if (item1 == item2) { } |
| * -------------- |
| */ |
| override bool opEquals(scope const Object o) const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const CData) item; |
| return t !is null && content == t.content; |
| } |
| |
| /** |
| * Compares two CDatas |
| * |
| * You should rarely need to call this function. It exists so that CDatas |
| * can be used as associative array keys. |
| * |
| * Example: |
| * -------------- |
| * CData item1,item2; |
| * if (item1 < item2) { } |
| * -------------- |
| */ |
| override int opCmp(scope const Object o) scope const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const CData) item; |
| return t !is null && (content != t.content |
| ? (content < t.content ? -1 : 1 ) : 0 ); |
| } |
| |
| /** |
| * Returns the hash of a CData |
| * |
| * You should rarely need to call this function. It exists so that CDatas |
| * can be used as associative array keys. |
| */ |
| override size_t toHash() scope const nothrow { return hash(content); } |
| |
| /** |
| * Returns a string representation of this CData section |
| */ |
| override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; } |
| |
| override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always |
| } |
| |
| /** |
| * Class representing a text (aka Parsed Character Data) section |
| */ |
| class Text : Item |
| { |
| private string content; |
| |
| /** |
| * Construct a text (aka PCData) section |
| * |
| * Params: |
| * content = the text. This function encodes the text before |
| * insertion, so it is safe to insert any text |
| * |
| * Example: |
| * -------------- |
| * auto Text = new CData("a < b"); |
| * // constructs a < b |
| * -------------- |
| */ |
| this(string content) @safe pure |
| { |
| this.content = encode(content); |
| } |
| |
| /** |
| * Compares two text sections for equality |
| * |
| * Example: |
| * -------------- |
| * Text item1,item2; |
| * if (item1 == item2) { } |
| * -------------- |
| */ |
| override bool opEquals(scope const Object o) const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const Text) item; |
| return t !is null && content == t.content; |
| } |
| |
| /** |
| * Compares two text sections |
| * |
| * You should rarely need to call this function. It exists so that Texts |
| * can be used as associative array keys. |
| * |
| * Example: |
| * -------------- |
| * Text item1,item2; |
| * if (item1 < item2) { } |
| * -------------- |
| */ |
| override int opCmp(scope const Object o) scope const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const Text) item; |
| return t !is null |
| && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); |
| } |
| |
| /** |
| * Returns the hash of a text section |
| * |
| * You should rarely need to call this function. It exists so that Texts |
| * can be used as associative array keys. |
| */ |
| override size_t toHash() scope const nothrow { return hash(content); } |
| |
| /** |
| * Returns a string representation of this Text section |
| */ |
| override string toString() scope const @safe @nogc pure nothrow { return content; } |
| |
| /** |
| * Returns true if the content is the empty string |
| */ |
| override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; } |
| } |
| |
| /** |
| * Class representing an XML Instruction section |
| */ |
| class XMLInstruction : Item |
| { |
| private string content; |
| |
| /** |
| * Construct an XML Instruction section |
| * |
| * Params: |
| * content = the body of the instruction segment |
| * |
| * Throws: XIException if the segment body is illegal (contains ">") |
| * |
| * Example: |
| * -------------- |
| * auto item = new XMLInstruction("ATTLIST"); |
| * // constructs <!ATTLIST> |
| * -------------- |
| */ |
| this(string content) @safe pure |
| { |
| import std.string : indexOf; |
| if (content.indexOf(">") != -1) throw new XIException(content); |
| this.content = content; |
| } |
| |
| /** |
| * Compares two XML instructions for equality |
| * |
| * Example: |
| * -------------- |
| * XMLInstruction item1,item2; |
| * if (item1 == item2) { } |
| * -------------- |
| */ |
| override bool opEquals(scope const Object o) const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const XMLInstruction) item; |
| return t !is null && content == t.content; |
| } |
| |
| /** |
| * Compares two XML instructions |
| * |
| * You should rarely need to call this function. It exists so that |
| * XmlInstructions can be used as associative array keys. |
| * |
| * Example: |
| * -------------- |
| * XMLInstruction item1,item2; |
| * if (item1 < item2) { } |
| * -------------- |
| */ |
| override int opCmp(scope const Object o) scope const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const XMLInstruction) item; |
| return t !is null |
| && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); |
| } |
| |
| /** |
| * Returns the hash of an XMLInstruction |
| * |
| * You should rarely need to call this function. It exists so that |
| * XmlInstructions can be used as associative array keys. |
| */ |
| override size_t toHash() scope const nothrow { return hash(content); } |
| |
| /** |
| * Returns a string representation of this XmlInstruction |
| */ |
| override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; } |
| |
| override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always |
| } |
| |
| /** |
| * Class representing a Processing Instruction section |
| */ |
| class ProcessingInstruction : Item |
| { |
| private string content; |
| |
| /** |
| * Construct a Processing Instruction section |
| * |
| * Params: |
| * content = the body of the instruction segment |
| * |
| * Throws: PIException if the segment body is illegal (contains "?>") |
| * |
| * Example: |
| * -------------- |
| * auto item = new ProcessingInstruction("php"); |
| * // constructs <?php?> |
| * -------------- |
| */ |
| this(string content) @safe pure |
| { |
| import std.string : indexOf; |
| if (content.indexOf("?>") != -1) throw new PIException(content); |
| this.content = content; |
| } |
| |
| /** |
| * Compares two processing instructions for equality |
| * |
| * Example: |
| * -------------- |
| * ProcessingInstruction item1,item2; |
| * if (item1 == item2) { } |
| * -------------- |
| */ |
| override bool opEquals(scope const Object o) const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const ProcessingInstruction) item; |
| return t !is null && content == t.content; |
| } |
| |
| /** |
| * Compares two processing instructions |
| * |
| * You should rarely need to call this function. It exists so that |
| * ProcessingInstructions can be used as associative array keys. |
| * |
| * Example: |
| * -------------- |
| * ProcessingInstruction item1,item2; |
| * if (item1 < item2) { } |
| * -------------- |
| */ |
| override int opCmp(scope const Object o) scope const |
| { |
| const item = toType!(const Item)(o); |
| const t = cast(const ProcessingInstruction) item; |
| return t !is null |
| && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); |
| } |
| |
| /** |
| * Returns the hash of a ProcessingInstruction |
| * |
| * You should rarely need to call this function. It exists so that |
| * ProcessingInstructions can be used as associative array keys. |
| */ |
| override size_t toHash() scope const nothrow { return hash(content); } |
| |
| /** |
| * Returns a string representation of this ProcessingInstruction |
| */ |
| override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; } |
| |
| override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always |
| } |
| |
| /** |
| * Abstract base class for XML items |
| */ |
| abstract class Item |
| { |
| /// Compares with another Item of same type for equality |
| abstract override bool opEquals(scope const Object o) @safe const; |
| |
| /// Compares with another Item of same type |
| abstract override int opCmp(scope const Object o) @safe const; |
| |
| /// Returns the hash of this item |
| abstract override size_t toHash() @safe scope const; |
| |
| /// Returns a string representation of this item |
| abstract override string toString() @safe scope const; |
| |
| /** |
| * Returns an indented string representation of this item |
| * |
| * Params: |
| * indent = number of spaces by which to indent child elements |
| */ |
| string[] pretty(uint indent) @safe scope const |
| { |
| import std.string : strip; |
| string s = strip(toString()); |
| return s.length == 0 ? [] : [ s ]; |
| } |
| |
| /// Returns true if the item represents empty XML text |
| abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const; |
| } |
| |
| /** |
| * Class for parsing an XML Document. |
| * |
| * This is a subclass of ElementParser. Most of the useful functions are |
| * documented there. |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Bugs: |
| * Currently only supports UTF documents. |
| * |
| * If there is an encoding attribute in the prolog, it is ignored. |
| * |
| */ |
| class DocumentParser : ElementParser |
| { |
| string xmlText; |
| |
| /** |
| * Constructs a DocumentParser. |
| * |
| * The input to this function MUST be valid XML. |
| * This is enforced by the function's in contract. |
| * |
| * Params: |
| * xmlText_ = the entire XML document as text |
| * |
| */ |
| this(string xmlText_) |
| in |
| { |
| assert(xmlText_.length != 0); |
| try |
| { |
| // Confirm that the input is valid XML |
| check(xmlText_); |
| } |
| catch (CheckException e) |
| { |
| // And if it's not, tell the user why not |
| assert(false, "\n" ~ e.toString()); |
| } |
| } |
| body |
| { |
| xmlText = xmlText_; |
| s = &xmlText; |
| super(); // Initialize everything |
| parse(); // Parse through the root tag (but not beyond) |
| } |
| } |
| |
| @system unittest |
| { |
| auto doc = new Document("<root><child><grandchild/></child></root>"); |
| assert(doc.elements.length == 1); |
| assert(doc.elements[0].tag.name == "child"); |
| assert(doc.items == doc.elements); |
| } |
| |
| /** |
| * Class for parsing an XML element. |
| * |
| * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) |
| * |
| * Note that you cannot construct instances of this class directly. You can |
| * construct a DocumentParser (which is a subclass of ElementParser), but |
| * otherwise, Instances of ElementParser will be created for you by the |
| * library, and passed your way via onStartTag handlers. |
| * |
| */ |
| class ElementParser |
| { |
| alias Handler = void delegate(string); |
| alias ElementHandler = void delegate(in Element element); |
| alias ParserHandler = void delegate(ElementParser parser); |
| |
| private |
| { |
| Tag tag_; |
| string elementStart; |
| string* s; |
| |
| Handler commentHandler = null; |
| Handler cdataHandler = null; |
| Handler xiHandler = null; |
| Handler piHandler = null; |
| Handler rawTextHandler = null; |
| Handler textHandler = null; |
| |
| // Private constructor for start tags |
| this(ElementParser parent) @safe @nogc pure nothrow |
| { |
| s = parent.s; |
| this(); |
| tag_ = parent.tag_; |
| } |
| |
| // Private constructor for empty tags |
| this(Tag tag, string* t) @safe @nogc pure nothrow |
| { |
| s = t; |
| this(); |
| tag_ = tag; |
| } |
| } |
| |
| /** |
| * The Tag at the start of the element being parsed. You can read this to |
| * determine the tag's name and attributes. |
| */ |
| @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; } |
| |
| /** |
| * Register a handler which will be called whenever a start tag is |
| * encountered which matches the specified name. You can also pass null as |
| * the name, in which case the handler will be called for any unmatched |
| * start tag. |
| * |
| * Example: |
| * -------------- |
| * // Call this function whenever a <podcast> start tag is encountered |
| * onStartTag["podcast"] = (ElementParser xml) |
| * { |
| * // Your code here |
| * // |
| * // This is a a closure, so code here may reference |
| * // variables which are outside of this scope |
| * }; |
| * |
| * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> |
| * // start tag is encountered |
| * onStartTag["episode"] = &myEpisodeStartHandler; |
| * |
| * // call delegate dg for all other start tags |
| * onStartTag[null] = dg; |
| * -------------- |
| * |
| * This library will supply your function with a new instance of |
| * ElementHandler, which may be used to parse inside the element whose |
| * start tag was just found, or to identify the tag attributes of the |
| * element, etc. |
| * |
| * Note that your function will be called for both start tags and empty |
| * tags. That is, we make no distinction between <br></br> |
| * and <br/>. |
| */ |
| ParserHandler[string] onStartTag; |
| |
| /** |
| * Register a handler which will be called whenever an end tag is |
| * encountered which matches the specified name. You can also pass null as |
| * the name, in which case the handler will be called for any unmatched |
| * end tag. |
| * |
| * Example: |
| * -------------- |
| * // Call this function whenever a </podcast> end tag is encountered |
| * onEndTag["podcast"] = (in Element e) |
| * { |
| * // Your code here |
| * // |
| * // This is a a closure, so code here may reference |
| * // variables which are outside of this scope |
| * }; |
| * |
| * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> |
| * // end tag is encountered |
| * onEndTag["episode"] = &myEpisodeEndHandler; |
| * |
| * // call delegate dg for all other end tags |
| * onEndTag[null] = dg; |
| * -------------- |
| * |
| * Note that your function will be called for both start tags and empty |
| * tags. That is, we make no distinction between <br></br> |
| * and <br/>. |
| */ |
| ElementHandler[string] onEndTag; |
| |
| protected this() @safe @nogc pure nothrow |
| { |
| elementStart = *s; |
| } |
| |
| /** |
| * Register a handler which will be called whenever text is encountered. |
| * |
| * Example: |
| * -------------- |
| * // Call this function whenever text is encountered |
| * onText = (string s) |
| * { |
| * // Your code here |
| * |
| * // The passed parameter s will have been decoded by the time you see |
| * // it, and so may contain any character. |
| * // |
| * // This is a a closure, so code here may reference |
| * // variables which are outside of this scope |
| * }; |
| * -------------- |
| */ |
| @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; } |
| |
| /** |
| * Register an alternative handler which will be called whenever text |
| * is encountered. This differs from onText in that onText will decode |
| * the text, whereas onTextRaw will not. This allows you to make design |
| * choices, since onText will be more accurate, but slower, while |
| * onTextRaw will be faster, but less accurate. Of course, you can |
| * still call decode() within your handler, if you want, but you'd |
| * probably want to use onTextRaw only in circumstances where you |
| * know that decoding is unnecessary. |
| * |
| * Example: |
| * -------------- |
| * // Call this function whenever text is encountered |
| * onText = (string s) |
| * { |
| * // Your code here |
| * |
| * // The passed parameter s will NOT have been decoded. |
| * // |
| * // This is a a closure, so code here may reference |
| * // variables which are outside of this scope |
| * }; |
| * -------------- |
| */ |
| @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; } |
| |
| /** |
| * Register a handler which will be called whenever a character data |
| * segment is encountered. |
| * |
| * Example: |
| * -------------- |
| * // Call this function whenever a CData section is encountered |
| * onCData = (string s) |
| * { |
| * // Your code here |
| * |
| * // The passed parameter s does not include the opening <![CDATA[ |
| * // nor closing ]]> |
| * // |
| * // This is a a closure, so code here may reference |
| * // variables which are outside of this scope |
| * }; |
| * -------------- |
| */ |
| @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; } |
| |
| /** |
| * Register a handler which will be called whenever a comment is |
| * encountered. |
| * |
| * Example: |
| * -------------- |
| * // Call this function whenever a comment is encountered |
| * onComment = (string s) |
| * { |
| * // Your code here |
| * |
| * // The passed parameter s does not include the opening <!-- nor |
| * // closing --> |
| * // |
| * // This is a a closure, so code here may reference |
| * // variables which are outside of this scope |
| * }; |
| * -------------- |
| */ |
| @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; } |
| |
| /** |
| * Register a handler which will be called whenever a processing |
| * instruction is encountered. |
| * |
| * Example: |
| * -------------- |
| * // Call this function whenever a processing instruction is encountered |
| * onPI = (string s) |
| * { |
| * // Your code here |
| * |
| * // The passed parameter s does not include the opening <? nor |
| * // closing ?> |
| * // |
| * // This is a a closure, so code here may reference |
| * // variables which are outside of this scope |
| * }; |
| * -------------- |
| */ |
| @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; } |
| |
| /** |
| * Register a handler which will be called whenever an XML instruction is |
| * encountered. |
| * |
| * Example: |
| * -------------- |
| * // Call this function whenever an XML instruction is encountered |
| * // (Note: XML instructions may only occur preceding the root tag of a |
| * // document). |
| * onPI = (string s) |
| * { |
| * // Your code here |
| * |
| * // The passed parameter s does not include the opening <! nor |
| * // closing > |
| * // |
| * // This is a a closure, so code here may reference |
| * // variables which are outside of this scope |
| * }; |
| * -------------- |
| */ |
| @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; } |
| |
| /** |
| * Parse an XML element. |
| * |
| * Parsing will continue until the end of the current element. Any items |
| * encountered for which a handler has been registered will invoke that |
| * handler. |
| * |
| * Throws: various kinds of XMLException |
| */ |
| void parse() |
| { |
| import std.algorithm.searching : startsWith; |
| import std.string : indexOf; |
| |
| string t; |
| const Tag root = tag_; |
| Tag[string] startTags; |
| if (tag_ !is null) startTags[tag_.name] = tag_; |
| |
| while (s.length != 0) |
| { |
| if (startsWith(*s,"<!--")) |
| { |
| chop(*s,4); |
| t = chop(*s,indexOf(*s,"-->")); |
| if (commentHandler.funcptr !is null) commentHandler(t); |
| chop(*s,3); |
| } |
| else if (startsWith(*s,"<![CDATA[")) |
| { |
| chop(*s,9); |
| t = chop(*s,indexOf(*s,"]]>")); |
| if (cdataHandler.funcptr !is null) cdataHandler(t); |
| chop(*s,3); |
| } |
| else if (startsWith(*s,"<!")) |
| { |
| chop(*s,2); |
| t = chop(*s,indexOf(*s,">")); |
| if (xiHandler.funcptr !is null) xiHandler(t); |
| chop(*s,1); |
| } |
| else if (startsWith(*s,"<?")) |
| { |
| chop(*s,2); |
| t = chop(*s,indexOf(*s,"?>")); |
| if (piHandler.funcptr !is null) piHandler(t); |
| chop(*s,2); |
| } |
| else if (startsWith(*s,"<")) |
| { |
| tag_ = new Tag(*s,true); |
| if (root is null) |
| return; // Return to constructor of derived class |
| |
| if (tag_.isStart) |
| { |
| startTags[tag_.name] = tag_; |
| |
| auto parser = new ElementParser(this); |
| |
| auto handler = tag_.name in onStartTag; |
| if (handler !is null) (*handler)(parser); |
| else |
| { |
| handler = null in onStartTag; |
| if (handler !is null) (*handler)(parser); |
| } |
| } |
| else if (tag_.isEnd) |
| { |
| const startTag = startTags[tag_.name]; |
| string text; |
| |
| if (startTag.tagString.length == 0) |
| assert(0); |
| |
| immutable(char)* p = startTag.tagString.ptr |
| + startTag.tagString.length; |
| immutable(char)* q = &tag_.tagString[0]; |
| text = decode(p[0..(q-p)], DecodeMode.LOOSE); |
| |
| auto element = new Element(startTag); |
| if (text.length != 0) element ~= new Text(text); |
| |
| auto handler = tag_.name in onEndTag; |
| if (handler !is null) (*handler)(element); |
| else |
| { |
| handler = null in onEndTag; |
| if (handler !is null) (*handler)(element); |
| } |
| |
| if (tag_.name == root.name) return; |
| } |
| else if (tag_.isEmpty) |
| { |
| Tag startTag = new Tag(tag_.name); |
| |
| // FIX by hed010gy, for bug 2979 |
| // http://d.puremagic.com/issues/show_bug.cgi?id=2979 |
| if (tag_.attr.length > 0) |
| foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv; |
| // END FIX |
| |
| // Handle the pretend start tag |
| string s2; |
| auto parser = new ElementParser(startTag,&s2); |
| auto handler1 = startTag.name in onStartTag; |
| if (handler1 !is null) (*handler1)(parser); |
| else |
| { |
| handler1 = null in onStartTag; |
| if (handler1 !is null) (*handler1)(parser); |
| } |
| |
| // Handle the pretend end tag |
| auto element = new Element(startTag); |
| auto handler2 = tag_.name in onEndTag; |
| if (handler2 !is null) (*handler2)(element); |
| else |
| { |
| handler2 = null in onEndTag; |
| if (handler2 !is null) (*handler2)(element); |
| } |
| } |
| } |
| else |
| { |
| t = chop(*s,indexOf(*s,"<")); |
| if (rawTextHandler.funcptr !is null) |
| rawTextHandler(t); |
| else if (textHandler.funcptr !is null) |
| textHandler(decode(t,DecodeMode.LOOSE)); |
| } |
| } |
| } |
| |
| /** |
| * Returns that part of the element which has already been parsed |
| */ |
| override string toString() const @nogc @safe pure nothrow |
| { |
| assert(elementStart.length >= s.length); |
| return elementStart[0 .. elementStart.length - s.length]; |
| } |
| |
| } |
| |
| private |
| { |
| template Check(string msg) |
| { |
| string old = s; |
| |
| void fail() @safe pure |
| { |
| s = old; |
| throw new Err(s,msg); |
| } |
| |
| void fail(Err e) @safe pure |
| { |
| s = old; |
| throw new Err(s,msg,e); |
| } |
| |
| void fail(string msg2) @safe pure |
| { |
| fail(new Err(s,msg2)); |
| } |
| } |
| |
| void checkMisc(ref string s) @safe pure // rule 27 |
| { |
| import std.algorithm.searching : startsWith; |
| |
| mixin Check!("Misc"); |
| |
| try |
| { |
| if (s.startsWith("<!--")) { checkComment(s); } |
| else if (s.startsWith("<?")) { checkPI(s); } |
| else { checkSpace(s); } |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkDocument(ref string s) @safe pure // rule 1 |
| { |
| mixin Check!("Document"); |
| try |
| { |
| checkProlog(s); |
| checkElement(s); |
| star!(checkMisc)(s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkChars(ref string s) @safe pure // rule 2 |
| { |
| // TO DO - Fix std.utf stride and decode functions, then use those |
| // instead |
| import std.format : format; |
| |
| mixin Check!("Chars"); |
| |
| dchar c; |
| ptrdiff_t n = -1; |
| // 'i' must not be smaller than size_t because size_t is used internally in |
| // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. |
| foreach (size_t i, dchar d; s) |
| { |
| if (!isChar(d)) |
| { |
| c = d; |
| n = i; |
| break; |
| } |
| } |
| if (n != -1) |
| { |
| s = s[n..$]; |
| fail(format("invalid character: U+%04X",c)); |
| } |
| } |
| |
| void checkSpace(ref string s) @safe pure // rule 3 |
| { |
| import std.algorithm.searching : countUntil; |
| import std.ascii : isWhite; |
| import std.utf : byCodeUnit; |
| |
| mixin Check!("Whitespace"); |
| ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a)); |
| if (i == -1 && s.length > 0 && isWhite(s[0])) |
| s = s[$ .. $]; |
| else if (i > -1) |
| s = s[i .. $]; |
| if (s is old) fail(); |
| } |
| |
| void checkName(ref string s, out string name) @safe pure // rule 5 |
| { |
| mixin Check!("Name"); |
| |
| if (s.length == 0) fail(); |
| ptrdiff_t n; |
| // 'i' must not be smaller than size_t because size_t is used internally in |
| // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. |
| foreach (size_t i, dchar c; s) |
| { |
| if (c == '_' || c == ':' || isLetter(c)) continue; |
| if (i == 0) fail(); |
| if (c == '-' || c == '.' || isDigit(c) |
| || isCombiningChar(c) || isExtender(c)) continue; |
| n = i; |
| break; |
| } |
| name = s[0 .. n]; |
| s = s[n..$]; |
| } |
| |
| void checkAttValue(ref string s) @safe pure // rule 10 |
| { |
| import std.algorithm.searching : countUntil; |
| import std.utf : byCodeUnit; |
| |
| mixin Check!("AttValue"); |
| |
| if (s.length == 0) fail(); |
| char c = s[0]; |
| if (c != '\u0022' && c != '\u0027') |
| fail("attribute value requires quotes"); |
| s = s[1..$]; |
| for (;;) |
| { |
| s = s[s.byCodeUnit.countUntil(c) .. $]; |
| if (s.length == 0) fail("unterminated attribute value"); |
| if (s[0] == '<') fail("< found in attribute value"); |
| if (s[0] == c) break; |
| try { checkReference(s); } catch (Err e) { fail(e); } |
| } |
| s = s[1..$]; |
| } |
| |
| void checkCharData(ref string s) @safe pure // rule 14 |
| { |
| import std.algorithm.searching : startsWith; |
| |
| mixin Check!("CharData"); |
| |
| while (s.length != 0) |
| { |
| if (s.startsWith("&")) break; |
| if (s.startsWith("<")) break; |
| if (s.startsWith("]]>")) fail("]]> found within char data"); |
| s = s[1..$]; |
| } |
| } |
| |
| void checkComment(ref string s) @safe pure // rule 15 |
| { |
| import std.string : indexOf; |
| |
| mixin Check!("Comment"); |
| |
| try { checkLiteral("<!--",s); } catch (Err e) { fail(e); } |
| ptrdiff_t n = s.indexOf("--"); |
| if (n == -1) fail("unterminated comment"); |
| s = s[n..$]; |
| try { checkLiteral("-->",s); } catch (Err e) { fail(e); } |
| } |
| |
| void checkPI(ref string s) @safe pure // rule 16 |
| { |
| mixin Check!("PI"); |
| |
| try |
| { |
| checkLiteral("<?",s); |
| checkEnd("?>",s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkCDSect(ref string s) @safe pure // rule 18 |
| { |
| mixin Check!("CDSect"); |
| |
| try |
| { |
| checkLiteral(cdata,s); |
| checkEnd("]]>",s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkProlog(ref string s) @safe pure // rule 22 |
| { |
| mixin Check!("Prolog"); |
| |
| try |
| { |
| /* The XML declaration is optional |
| * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog |
| */ |
| opt!(checkXMLDecl)(s); |
| |
| star!(checkMisc)(s); |
| opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkXMLDecl(ref string s) @safe pure // rule 23 |
| { |
| mixin Check!("XMLDecl"); |
| |
| try |
| { |
| checkLiteral("<?xml",s); |
| checkVersionInfo(s); |
| opt!(checkEncodingDecl)(s); |
| opt!(checkSDDecl)(s); |
| opt!(checkSpace)(s); |
| checkLiteral("?>",s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkVersionInfo(ref string s) @safe pure // rule 24 |
| { |
| mixin Check!("VersionInfo"); |
| |
| try |
| { |
| checkSpace(s); |
| checkLiteral("version",s); |
| checkEq(s); |
| quoted!(checkVersionNum)(s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkEq(ref string s) @safe pure // rule 25 |
| { |
| mixin Check!("Eq"); |
| |
| try |
| { |
| opt!(checkSpace)(s); |
| checkLiteral("=",s); |
| opt!(checkSpace)(s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkVersionNum(ref string s) @safe pure // rule 26 |
| { |
| import std.algorithm.searching : countUntil; |
| import std.utf : byCodeUnit; |
| |
| mixin Check!("VersionNum"); |
| |
| s = s[s.byCodeUnit.countUntil('\"') .. $]; |
| if (s is old) fail(); |
| } |
| |
| void checkDocTypeDecl(ref string s) @safe pure // rule 28 |
| { |
| mixin Check!("DocTypeDecl"); |
| |
| try |
| { |
| checkLiteral("<!DOCTYPE",s); |
| // |
| // TO DO -- ensure DOCTYPE is well formed |
| // (But not yet. That's one of our "future directions") |
| // |
| checkEnd(">",s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkSDDecl(ref string s) @safe pure // rule 32 |
| { |
| import std.algorithm.searching : startsWith; |
| |
| mixin Check!("SDDecl"); |
| |
| try |
| { |
| checkSpace(s); |
| checkLiteral("standalone",s); |
| checkEq(s); |
| } |
| catch (Err e) { fail(e); } |
| |
| int n = 0; |
| if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; |
| else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; |
| else fail("standalone attribute value must be 'yes', \"yes\","~ |
| " 'no' or \"no\""); |
| s = s[n..$]; |
| } |
| |
| void checkElement(ref string s) @safe pure // rule 39 |
| { |
| mixin Check!("Element"); |
| |
| string sname,ename,t; |
| try { checkTag(s,t,sname); } catch (Err e) { fail(e); } |
| |
| if (t == "STag") |
| { |
| try |
| { |
| checkContent(s); |
| t = s; |
| checkETag(s,ename); |
| } |
| catch (Err e) { fail(e); } |
| |
| if (sname != ename) |
| { |
| s = t; |
| fail("end tag name \"" ~ ename |
| ~ "\" differs from start tag name \""~sname~"\""); |
| } |
| } |
| } |
| |
| // rules 40 and 44 |
| void checkTag(ref string s, out string type, out string name) @safe pure |
| { |
| mixin Check!("Tag"); |
| |
| try |
| { |
| type = "STag"; |
| checkLiteral("<",s); |
| checkName(s,name); |
| star!(seq!(checkSpace,checkAttribute))(s); |
| opt!(checkSpace)(s); |
| if (s.length != 0 && s[0] == '/') |
| { |
| s = s[1..$]; |
| type = "ETag"; |
| } |
| checkLiteral(">",s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkAttribute(ref string s) @safe pure // rule 41 |
| { |
| mixin Check!("Attribute"); |
| |
| try |
| { |
| string name; |
| checkName(s,name); |
| checkEq(s); |
| checkAttValue(s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkETag(ref string s, out string name) @safe pure // rule 42 |
| { |
| mixin Check!("ETag"); |
| |
| try |
| { |
| checkLiteral("</",s); |
| checkName(s,name); |
| opt!(checkSpace)(s); |
| checkLiteral(">",s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkContent(ref string s) @safe pure // rule 43 |
| { |
| import std.algorithm.searching : startsWith; |
| |
| mixin Check!("Content"); |
| |
| try |
| { |
| while (s.length != 0) |
| { |
| old = s; |
| if (s.startsWith("&")) { checkReference(s); } |
| else if (s.startsWith("<!--")) { checkComment(s); } |
| else if (s.startsWith("<?")) { checkPI(s); } |
| else if (s.startsWith(cdata)) { checkCDSect(s); } |
| else if (s.startsWith("</")) { break; } |
| else if (s.startsWith("<")) { checkElement(s); } |
| else { checkCharData(s); } |
| } |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkCharRef(ref string s, out dchar c) @safe pure // rule 66 |
| { |
| import std.format : format; |
| |
| mixin Check!("CharRef"); |
| |
| c = 0; |
| try { checkLiteral("&#",s); } catch (Err e) { fail(e); } |
| int radix = 10; |
| if (s.length != 0 && s[0] == 'x') |
| { |
| s = s[1..$]; |
| radix = 16; |
| } |
| if (s.length == 0) fail("unterminated character reference"); |
| if (s[0] == ';') |
| fail("character reference must have at least one digit"); |
| while (s.length != 0) |
| { |
| immutable char d = s[0]; |
| int n = 0; |
| switch (d) |
| { |
| case 'F','f': ++n; goto case; |
| case 'E','e': ++n; goto case; |
| case 'D','d': ++n; goto case; |
| case 'C','c': ++n; goto case; |
| case 'B','b': ++n; goto case; |
| case 'A','a': ++n; goto case; |
| case '9': ++n; goto case; |
| case '8': ++n; goto case; |
| case '7': ++n; goto case; |
| case '6': ++n; goto case; |
| case '5': ++n; goto case; |
| case '4': ++n; goto case; |
| case '3': ++n; goto case; |
| case '2': ++n; goto case; |
| case '1': ++n; goto case; |
| case '0': break; |
| default: n = 100; break; |
| } |
| if (n >= radix) break; |
| c *= radix; |
| c += n; |
| s = s[1..$]; |
| } |
| if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); |
| if (s.length == 0 || s[0] != ';') fail("expected ;"); |
| else s = s[1..$]; |
| } |
| |
| void checkReference(ref string s) @safe pure // rule 67 |
| { |
| import std.algorithm.searching : startsWith; |
| |
| mixin Check!("Reference"); |
| |
| try |
| { |
| dchar c; |
| if (s.startsWith("&#")) checkCharRef(s,c); |
| else checkEntityRef(s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkEntityRef(ref string s) @safe pure // rule 68 |
| { |
| mixin Check!("EntityRef"); |
| |
| try |
| { |
| string name; |
| checkLiteral("&",s); |
| checkName(s,name); |
| checkLiteral(";",s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| void checkEncName(ref string s) @safe pure // rule 81 |
| { |
| import std.algorithm.searching : countUntil; |
| import std.ascii : isAlpha; |
| import std.utf : byCodeUnit; |
| |
| mixin Check!("EncName"); |
| |
| s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $]; |
| if (s is old) fail(); |
| s = s[s.byCodeUnit.countUntil('\"', '\'') .. $]; |
| } |
| |
| void checkEncodingDecl(ref string s) @safe pure // rule 80 |
| { |
| mixin Check!("EncodingDecl"); |
| |
| try |
| { |
| checkSpace(s); |
| checkLiteral("encoding",s); |
| checkEq(s); |
| quoted!(checkEncName)(s); |
| } |
| catch (Err e) { fail(e); } |
| } |
| |
| // Helper functions |
| |
| void checkLiteral(string literal,ref string s) @safe pure |
| { |
| import std.string : startsWith; |
| |
| mixin Check!("Literal"); |
| |
| if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); |
| s = s[literal.length..$]; |
| } |
| |
| void checkEnd(string end,ref string s) @safe pure |
| { |
| import std.string : indexOf; |
| // Deliberately no mixin Check here. |
| |
| auto n = s.indexOf(end); |
| if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); |
| s = s[n..$]; |
| checkLiteral(end,s); |
| } |
| |
| // Metafunctions -- none of these use mixin Check |
| |
| void opt(alias f)(ref string s) |
| { |
| try { f(s); } catch (Err e) {} |
| } |
| |
| void plus(alias f)(ref string s) |
| { |
| f(s); |
| star!(f)(s); |
| } |
| |
| void star(alias f)(ref string s) |
| { |
| while (s.length != 0) |
| { |
| try { f(s); } |
| catch (Err e) { return; } |
| } |
| } |
| |
| void quoted(alias f)(ref string s) |
| { |
| import std.string : startsWith; |
| |
| if (s.startsWith("'")) |
| { |
| checkLiteral("'",s); |
| f(s); |
| checkLiteral("'",s); |
| } |
| else |
| { |
| checkLiteral("\"",s); |
| f(s); |
| checkLiteral("\"",s); |
| } |
| } |
| |
| void seq(alias f,alias g)(ref string s) |
| { |
| f(s); |
| g(s); |
| } |
| } |
| |
| /** |
| * Check an entire XML document for well-formedness |
| * |
| * Params: |
| * s = the document to be checked, passed as a string |
| * |
| * Throws: CheckException if the document is not well formed |
| * |
| * CheckException's toString() method will yield the complete hierarchy of |
| * parse failure (the XML equivalent of a stack trace), giving the line and |
| * column number of every failure at every level. |
| */ |
| void check(string s) @safe pure |
| { |
| try |
| { |
| checkChars(s); |
| checkDocument(s); |
| if (s.length != 0) throw new Err(s,"Junk found after document"); |
| } |
| catch (Err e) |
| { |
| e.complete(s); |
| throw e; |
| } |
| } |
| |
| @system pure unittest |
| { |
| import std.string : indexOf; |
| |
| try |
| { |
| check(q"[<?xml version="1.0"?> |
| <catalog> |
| <book id="bk101"> |
| <author>Gambardella, Matthew</author> |
| <title>XML Developer's Guide</title> |
| <genre>Computer</genre> |
| <price>44.95</price> |
| <publish_date>2000-10-01</publish_date> |
| <description>An in-depth look at creating applications |
| with XML.</description> |
| </book> |
| <book id="bk102"> |
| <author>Ralls, Kim</author> |
| <title>Midnight Rain</title> |
| <genre>Fantasy</genres> |
| <price>5.95</price> |
| <publish_date>2000-12-16</publish_date> |
| <description>A former architect battles corporate zombies, |
| an evil sorceress, and her own childhood to become queen |
| of the world.</description> |
| </book> |
| <book id="bk103"> |
| <author>Corets, Eva</author> |
| <title>Maeve Ascendant</title> |
| <genre>Fantasy</genre> |
| <price>5.95</price> |
| <publish_date>2000-11-17</publish_date> |
| <description>After the collapse of a nanotechnology |
| society in England, the young survivors lay the |
| foundation for a new society.</description> |
| </book> |
| </catalog> |
| ]"); |
| assert(false); |
| } |
| catch (CheckException e) |
| { |
| auto n = e.toString().indexOf("end tag name \"genres\" differs"~ |
| " from start tag name \"genre\""); |
| assert(n != -1); |
| } |
| } |
| |
| @system unittest |
| { |
| string s = q"EOS |
| <?xml version="1.0"?> |
| <set> |
| <one>A</one> |
| <!-- comment --> |
| <two>B</two> |
| </set> |
| EOS"; |
| try |
| { |
| check(s); |
| } |
| catch (CheckException e) |
| { |
| assert(0, e.toString()); |
| } |
| } |
| |
| @system unittest |
| { |
| string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream |
| xmlns:stream="http://etherx.'jabber'.org/streams" |
| xmlns="jabber:'client'" from='jid.pl' id="587a5767" |
| xml:lang="en" version="1.0" attr='a"b"c'> |
| </stream:stream></r>`; |
| |
| DocumentParser parser = new DocumentParser(test_xml); |
| bool tested = false; |
| parser.onStartTag["stream:stream"] = (ElementParser p) { |
| assert(p.tag.attr["xmlns"] == "jabber:'client'"); |
| assert(p.tag.attr["from"] == "jid.pl"); |
| assert(p.tag.attr["attr"] == "a\"b\"c"); |
| tested = true; |
| }; |
| parser.parse(); |
| assert(tested); |
| } |
| |
| @system unittest |
| { |
| string s = q"EOS |
| <?xml version="1.0" encoding="utf-8"?> <Tests> |
| <Test thing="What & Up">What & Up Second</Test> |
| </Tests> |
| EOS"; |
| auto xml = new DocumentParser(s); |
| |
| xml.onStartTag["Test"] = (ElementParser xml) { |
| assert(xml.tag.attr["thing"] == "What & Up"); |
| }; |
| |
| xml.onEndTag["Test"] = (in Element e) { |
| assert(e.text() == "What & Up Second"); |
| }; |
| xml.parse(); |
| } |
| |
| @system unittest |
| { |
| string s = `<tag attr=""value>" />`; |
| auto doc = new Document(s); |
| assert(doc.toString() == s); |
| } |
| |
| /** The base class for exceptions thrown by this module */ |
| class XMLException : Exception { this(string msg) @safe pure { super(msg); } } |
| |
| // Other exceptions |
| |
| /// Thrown during Comment constructor |
| class CommentException : XMLException |
| { private this(string msg) @safe pure { super(msg); } } |
| |
| /// Thrown during CData constructor |
| class CDataException : XMLException |
| { private this(string msg) @safe pure { super(msg); } } |
| |
| /// Thrown during XMLInstruction constructor |
| class XIException : XMLException |
| { private this(string msg) @safe pure { super(msg); } } |
| |
| /// Thrown during ProcessingInstruction constructor |
| class PIException : XMLException |
| { private this(string msg) @safe pure { super(msg); } } |
| |
| /// Thrown during Text constructor |
| class TextException : XMLException |
| { private this(string msg) @safe pure { super(msg); } } |
| |
| /// Thrown during decode() |
| class DecodeException : XMLException |
| { private this(string msg) @safe pure { super(msg); } } |
| |
| /// Thrown if comparing with wrong type |
| class InvalidTypeException : XMLException |
| { private this(string msg) @safe pure { super(msg); } } |
| |
| /// Thrown when parsing for Tags |
| class TagException : XMLException |
| { private this(string msg) @safe pure { super(msg); } } |
| |
| /** |
| * Thrown during check() |
| */ |
| class CheckException : XMLException |
| { |
| CheckException err; /// Parent in hierarchy |
| private string tail; |
| /** |
| * Name of production rule which failed to parse, |
| * or specific error message |
| */ |
| string msg; |
| size_t line = 0; /// Line number at which parse failure occurred |
| size_t column = 0; /// Column number at which parse failure occurred |
| |
| private this(string tail,string msg,Err err=null) @safe pure |
| { |
| super(null); |
| this.tail = tail; |
| this.msg = msg; |
| this.err = err; |
| } |
| |
| private void complete(string entire) @safe pure |
| { |
| import std.string : count, lastIndexOf; |
| import std.utf : toUTF32; |
| |
| string head = entire[0..$-tail.length]; |
| ptrdiff_t n = head.lastIndexOf('\n') + 1; |
| line = head.count("\n") + 1; |
| dstring t = toUTF32(head[n..$]); |
| column = t.length + 1; |
| if (err !is null) err.complete(entire); |
| } |
| |
| override string toString() const @safe pure |
| { |
| import std.format : format; |
| |
| string s; |
| if (line != 0) s = format("Line %d, column %d: ",line,column); |
| s ~= msg; |
| s ~= '\n'; |
| if (err !is null) s = err.toString() ~ s; |
| return s; |
| } |
| } |
| |
| private alias Err = CheckException; |
| |
| // Private helper functions |
| |
| private |
| { |
| inout(T) toType(T)(inout Object o) |
| { |
| T t = cast(T)(o); |
| if (t is null) |
| { |
| throw new InvalidTypeException("Attempt to compare a " |
| ~ T.stringof ~ " with an instance of another type"); |
| } |
| return t; |
| } |
| |
| string chop(ref string s, size_t n) @safe pure nothrow |
| { |
| if (n == -1) n = s.length; |
| string t = s[0 .. n]; |
| s = s[n..$]; |
| return t; |
| } |
| |
| bool optc(ref string s, char c) @safe pure nothrow |
| { |
| immutable bool b = s.length != 0 && s[0] == c; |
| if (b) s = s[1..$]; |
| return b; |
| } |
| |
| void reqc(ref string s, char c) @safe pure |
| { |
| if (s.length == 0 || s[0] != c) throw new TagException(""); |
| s = s[1..$]; |
| } |
| |
| char requireOneOf(ref string s, string chars) @safe pure |
| { |
| import std.string : indexOf; |
| |
| if (s.length == 0 || indexOf(chars,s[0]) == -1) |
| throw new TagException(""); |
| immutable char ch = s[0]; |
| s = s[1..$]; |
| return ch; |
| } |
| |
| size_t hash(string s,size_t h=0) @trusted nothrow |
| { |
| return typeid(s).getHash(&s) + h; |
| } |
| |
| // Definitions from the XML specification |
| immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, |
| 0x10000,0x10FFFF]; |
| immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, |
| 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, |
| 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, |
| 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, |
| 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, |
| 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, |
| 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, |
| 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, |
| 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, |
| 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, |
| 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, |
| 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, |
| 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, |
| 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, |
| 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, |
| 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, |
| 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, |
| 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, |
| 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, |
| 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, |
| 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, |
| 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, |
| 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, |
| 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, |
| 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, |
| 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, |
| 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, |
| 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, |
| 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, |
| 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, |
| 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, |
| 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, |
| 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, |
| 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, |
| 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, |
| 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, |
| 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, |
| 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, |
| 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, |
| 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, |
| 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; |
| immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; |
| immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, |
| 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, |
| 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, |
| 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, |
| 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, |
| 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, |
| 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, |
| 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, |
| 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, |
| 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, |
| 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, |
| 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, |
| 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, |
| 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, |
| 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, |
| 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, |
| 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, |
| 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, |
| 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, |
| 0x3099,0x3099,0x309A,0x309A]; |
| immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, |
| 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, |
| 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, |
| 0x0ED9,0x0F20,0x0F29]; |
| immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, |
| 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, |
| 0x3035,0x309D,0x309E,0x30FC,0x30FE]; |
| |
| bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure |
| { |
| |