| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199 | /******************************************************************************* Copyright: Copyright (C) 2007 Aaron Craelius and Kris Bell All rights reserved. License: BSD style: $(LICENSE) version: Initial release: February 2008 Authors: Aaron, Kris *******************************************************************************/ module tango.text.xml.Document; package import tango.text.xml.PullParser; version(Clear) extern (C) void* memset(void* s, int c, size_t n); version=discrete; /******************************************************************************* Implements a DOM atop the XML parser, supporting document parsing, tree traversal and ad-hoc tree manipulation. The DOM API is non-conformant, yet simple and functional in style - locate a tree node of interest and operate upon or around it. In all cases you will need a document instance to begin, whereupon it may be populated either by parsing an existing document or via API manipulation. This particular DOM employs a simple free-list to allocate each of the tree nodes, making it quite efficient at parsing XML documents. The tradeoff with such a scheme is that copying nodes from one document to another requires a little more care than otherwise. We felt this was a reasonable tradeoff, given the throughput gains vs the relative infrequency of grafting operations. For grafting within or across documents, please use the move() and copy() methods. Another simplification is related to entity transcoding. This is not performed internally, and becomes the responsibility of the client. That is, the client should perform appropriate entity transcoding as necessary. Paying the (high) transcoding cost for all documents doesn't seem appropriate. Parse example --- auto doc = new Document!(char); doc.parse (content); auto print = new DocPrinter!(char); Stdout(print(doc)).newline; --- API example --- auto doc = new Document!(char); // attach an xml header doc.header; // attach an element with some attributes, plus // a child element with an attached data value doc.tree.element (null, "element") .attribute (null, "attrib1", "value") .attribute (null, "attrib2") .element (null, "child", "value"); auto print = new DocPrinter!(char); Stdout(print(doc)).newline; --- Note that the document tree() includes all nodes in the tree, and not just elements. Use doc.elements to address the topmost element instead. For example, adding an interior sibling to the prior illustration --- doc.elements.element (null, "sibling"); --- Printing the name of the topmost (root) element: --- Stdout.formatln ("first element is '{}'", doc.elements.name); --- XPath examples: --- auto doc = new Document!(char); // attach an element with some attributes, plus // a child element with an attached data value doc.tree.element (null, "element") .attribute (null, "attrib1", "value") .attribute (null, "attrib2") .element (null, "child", "value"); // select named-elements auto set = doc.query["element"]["child"]; // select all attributes named "attrib1" set = doc.query.descendant.attribute("attrib1"); // select elements with one parent and a matching text value set = doc.query[].filter((doc.Node n) {return n.children.hasData("value");}); --- Note that path queries are temporal - they do not retain content across mulitple queries. That is, the lifetime of a query result is limited unless you explicitly copy it. For example, this will fail --- auto elements = doc.query["element"]; auto children = elements["child"]; --- The above will lose elements because the associated document reuses node space for subsequent queries. In order to retain results, do this --- auto elements = doc.query["element"].dup; auto children = elements["child"]; --- The above .dup is generally very small (a set of pointers only). On the other hand, recursive queries are fully supported --- set = doc.query[].filter((doc.Node n) {return n.query[].count > 1;}); --- Typical usage tends to follow the following pattern, Where each query result is processed before another is initiated --- foreach (node; doc.query.child("element")) { // do something with each node } --- Note that the parser is templated for char, wchar or dchar. *******************************************************************************/ class Document(T) : PullParser!(T) { public alias NodeImpl* Node; private Node root; private NodeImpl[] list; private NodeImpl[][] lists; private ptrdiff_t index, chunks, freelists; private XmlPath!(T) xpath; /*********************************************************************** Construct a DOM instance. The optional parameter indicates the initial number of nodes assigned to the freelist ***********************************************************************/ this (size_t nodes = 1000) { assert (nodes > 50); super (null); xpath = new XmlPath!(T); chunks = nodes; newlist(); root = allocate(); root.id = XmlNodeType.Document; } /*********************************************************************** Return an xpath handle to query this document. This starts at the document root. See also Node.query ***********************************************************************/ final XmlPath!(T).NodeSet query () { return xpath.start (root); } /*********************************************************************** Return the root document node, from which all other nodes are descended. Returns null where there are no nodes in the document ***********************************************************************/ @property final Node tree () { return root; } /*********************************************************************** Return the topmost element node, which is generally the root of the element tree. Returns null where there are no top-level element nodes ***********************************************************************/ @property final Node elements () { if (root) { auto node = root.lastChild; while (node) if (node.id is XmlNodeType.Element) return node; else node = node.prev; } return null; } /*********************************************************************** Reset the freelist. Subsequent allocation of document nodes will overwrite prior instances. ***********************************************************************/ final Document reset () { root.lastChild = root.firstChild = null; version(Clear) { while (freelists) { auto list = lists[--freelists]; memset (list.ptr, 0, NodeImpl.sizeof * list.length); } } else { freelists = 0; } newlist(); index = 1; version(d) { freelists = 0; // needed to align the codegen! } return this; } /*********************************************************************** Prepend an XML header to the document tree ***********************************************************************/ final Document header (const(T)[] encoding = null) { if (encoding.length) encoding = `xml version="1.0" encoding="`~encoding~`"`; else encoding = `xml version="1.0" encoding="UTF-8"`; root.prepend (root.create(XmlNodeType.PI, encoding)); return this; } /*********************************************************************** Parse the given xml content, which will reuse any existing node within this document. The resultant tree is retrieved via the document 'tree' attribute ***********************************************************************/ final void parse(const(T[]) xml) { assert (xml); reset(); super.reset (xml); auto cur = root; size_t defNamespace; while (true) { auto p = text.point; switch (super.next) { case XmlTokenType.EndElement: case XmlTokenType.EndEmptyElement: assert (cur.host); cur.end = text.point; cur = cur.host; break; case XmlTokenType.Data: version (discrete) { auto node = allocate(); node.rawValue = super.rawValue; node.id = XmlNodeType.Data; cur.append (node); } else { if (cur.rawValue.length is 0) cur.rawValue = super.rawValue; else // multiple data sections cur.data (super.rawValue); } break; case XmlTokenType.StartElement: auto node = allocate(); node.host = cur; node.prefixed = super.prefix; node.id = XmlNodeType.Element; node.localName = super.localName; node.start = p; // inline append if (cur.lastChild) { cur.lastChild.nextSibling = node; node.prevSibling = cur.lastChild; cur.lastChild = node; } else { cur.firstChild = node; cur.lastChild = node; } cur = node; break; case XmlTokenType.Attribute: auto attr = allocate(); attr.prefixed = super.prefix; attr.rawValue = super.rawValue; attr.localName = super.localName; attr.id = XmlNodeType.Attribute; cur.attrib (attr); break; case XmlTokenType.PI: cur.pi_ (super.rawValue, p[0..text.point-p]); break; case XmlTokenType.Comment: cur.comment_ (super.rawValue); break; case XmlTokenType.CData: cur.cdata_ (super.rawValue); break; case XmlTokenType.Doctype: cur.doctype_ (super.rawValue); break; case XmlTokenType.Done: return; default: break; } } } /*********************************************************************** allocate a node from the freelist ***********************************************************************/ private final Node allocate () { if (index >= list.length) newlist(); auto p = &list[index++]; p.doc = this; version(Clear){} else { p.start = p.end = null; p.host = p.prevSibling = p.nextSibling = p.firstChild = p.lastChild = p.firstAttr = p.lastAttr = null; p.rawValue = p.localName = p.prefixed = null; } return p; } /*********************************************************************** allocate a node from the freelist ***********************************************************************/ private final void newlist () { index = 0; if (freelists >= lists.length) { lists.length = lists.length + 1; lists[$-1] = new NodeImpl [chunks]; } list = lists[freelists++]; } /*********************************************************************** foreach support for visiting and selecting nodes. A fruct is a low-overhead mechanism for capturing context relating to an opApply, and we use it here to sweep nodes when testing for various relationships. See Node.attributes and Node.children ***********************************************************************/ private struct Visitor { private Node node; public alias value data; public alias hasValue hasData; /*************************************************************** Is there anything to visit here? Time complexity: O(1) ***************************************************************/ bool exist () { return node != null; } /*************************************************************** traverse sibling nodes ***************************************************************/ int opApply (scope int delegate(ref Node) dg) { int ret; for (auto n=node; n; n = n.nextSibling) if ((ret = dg(n)) != 0) break; return ret; } /*************************************************************** Locate a node with a matching name and/or prefix, and which passes an optional filter. Each of the arguments will be ignored where they are null. Time complexity: O(n) ***************************************************************/ Node name (const(T[]) prefix, const(T[]) local, scope bool delegate(Node) dg=null) { for (auto n=node; n; n = n.nextSibling) { if (local.ptr && local != n.localName) continue; if (prefix.ptr && prefix != n.prefixed) continue; if (dg !is null && dg(n) is false) continue; return n; } return null; } /*************************************************************** Scan nodes for a matching name and/or prefix. Each of the arguments will be ignored where they are null. Time complexity: O(n) ***************************************************************/ bool hasName (const(T[]) prefix, const(T[]) local) { return name (prefix, local) != null; } /*************************************************************** Locate a node with a matching name and/or prefix, and which matches a specified value. Each of the arguments will be ignored where they are null. Time complexity: O(n) ***************************************************************/ version (Filter) { Node value (const(T[]) prefix, const(T[]) local, const(T[]) value) { if (value.ptr) return name (prefix, local, (Node n){return value == n.rawValue;}); return name (prefix, local); } } /*************************************************************** Sweep nodes looking for a match, and returns either a node or null. See value(x,y,z) or name(x,y,z) for additional filtering. Time complexity: O(n) ***************************************************************/ Node value (const(T[]) match) { if (match.ptr) for (auto n=node; n; n = n.nextSibling) if (match == n.rawValue) return n; return null; } /*************************************************************** Sweep the nodes looking for a value match. Returns true if found. See value(x,y,z) or name(x,y,z) for additional filtering. Time complexity: O(n) ***************************************************************/ bool hasValue (const(T[]) match) { return value(match) != null; } } /*********************************************************************** The node implementation ***********************************************************************/ private struct NodeImpl { public void* user; /// open for usage package Document doc; // owning document package XmlNodeType id; // node type package const(T)[] prefixed; // namespace package const(T)[] localName; // name package const(T)[] rawValue; // data value package Node host, // parent node prevSibling, // prior nextSibling, // next firstChild, // head lastChild, // tail firstAttr, // head lastAttr; // tail package const(T)* end, // slice of the ... start; // original xml text /*************************************************************** Return the hosting document ***************************************************************/ @property Document document () { return doc; } /*************************************************************** Return the node type-id ***************************************************************/ @property XmlNodeType type () { return id; } /*************************************************************** Return the parent, which may be null ***************************************************************/ @property Node parent () { return host; } /*************************************************************** Return the first child, which may be null ***************************************************************/ @property Node child () { return firstChild; } /*************************************************************** Return the last child, which may be null Deprecated: exposes too much implementation detail. Please file a ticket if you really need this functionality ***************************************************************/ deprecated Node childTail () { return lastChild; } /*************************************************************** Return the prior sibling, which may be null ***************************************************************/ @property Node prev () { return prevSibling; } /*************************************************************** Return the next sibling, which may be null ***************************************************************/ @property Node next () { return nextSibling; } /*************************************************************** Return the namespace prefix of this node (may be null) ***************************************************************/ @property const(T[]) prefix () { return prefixed; } /*************************************************************** Set the namespace prefix of this node (may be null) ***************************************************************/ @property Node prefix (const(T[]) replace) { prefixed = replace; return &this; } /*************************************************************** Return the vanilla node name (sans prefix) ***************************************************************/ @property const(T[]) name () { return localName; } /*************************************************************** Set the vanilla node name (sans prefix) ***************************************************************/ @property Node name (const(T[]) replace) { localName = replace; return &this; } /*************************************************************** Return the data content, which may be null ***************************************************************/ @property const(T[]) value () { version(discrete) { if (type is XmlNodeType.Element) foreach (child; children) if (child.id is XmlNodeType.Data || child.id is XmlNodeType.CData) return child.rawValue; } return rawValue; } /*************************************************************** Set the raw data content, which may be null ***************************************************************/ @property void value (const(T[]) val) { version(discrete) { if (type is XmlNodeType.Element) foreach (child; children) if (child.id is XmlNodeType.Data) return child.value (val); } rawValue = val; mutate(); } /*************************************************************** Return the full node name, which is a combination of the prefix & local names. Nodes without a prefix will return local-name only ***************************************************************/ const(T[]) toString (T[] output = null) { if (prefixed.length) { auto len = prefixed.length + localName.length + 1; // is the prefix already attached to the name? if (prefixed.ptr + prefixed.length + 1 is localName.ptr && ':' is *(localName.ptr-1)) return prefixed.ptr [0 .. len]; // nope, copy the discrete segments into output if (output.length < len) output.length = len; output[0..prefixed.length] = prefixed; output[prefixed.length] = ':'; output[prefixed.length+1 .. len] = localName; return output[0..len]; } return localName; } /*************************************************************** Return the index of this node, or how many prior siblings it has. Time complexity: O(n) ***************************************************************/ @property size_t position () { auto count = 0; auto prior = prevSibling; while (prior) ++count, prior = prior.prevSibling; return count; } /*************************************************************** Detach this node from its parent and siblings ***************************************************************/ Node detach () { return remove(); } /*************************************************************** Return an xpath handle to query this node See also Document.query ***************************************************************/ final XmlPath!(T).NodeSet query () { return doc.xpath.start (&this); } /*************************************************************** Return a foreach iterator for node children ***************************************************************/ @property Visitor children () { Visitor v = {firstChild}; return v; } /*************************************************************** Return a foreach iterator for node attributes ***************************************************************/ @property Visitor attributes () { Visitor v = {firstAttr}; return v; } /*************************************************************** Returns whether there are attributes present or not Deprecated: use node.attributes.exist instead ***************************************************************/ deprecated bool hasAttributes () { return firstAttr !is null; } /*************************************************************** Returns whether there are children present or nor Deprecated: use node.child or node.children.exist instead ***************************************************************/ deprecated bool hasChildren () { return firstChild !is null; } /*************************************************************** Duplicate the given sub-tree into place as a child of this node. Returns a reference to the subtree ***************************************************************/ Node copy (Node tree) { assert (tree); tree = tree.clone(); tree.migrate (document); if (tree.id is XmlNodeType.Attribute) attrib (tree); else append (tree); return tree; } /*************************************************************** Relocate the given sub-tree into place as a child of this node. Returns a reference to the subtree ***************************************************************/ Node move (Node tree) { tree.detach(); if (tree.doc is doc) { if (tree.id is XmlNodeType.Attribute) attrib (tree); else append (tree); } else tree = copy (tree); return tree; } /*************************************************************** Appends a new (child) Element and returns a reference to it. ***************************************************************/ Node element (const(T[]) prefix, const(T[]) local, const(T[]) value = null) { return element_ (prefix, local, value).mutate(); } /*************************************************************** Attaches an Attribute and returns this, the host ***************************************************************/ Node attribute (const(T[]) prefix, const(T[]) local, const(T[]) value = null) { return attribute_ (prefix, local, value).mutate(); } /*************************************************************** Attaches a Data node and returns this, the host ***************************************************************/ Node data (const(T[]) data) { return data_ (data).mutate(); } /*************************************************************** Attaches a CData node and returns this, the host ***************************************************************/ Node cdata (const(T[]) cdata) { return cdata_ (cdata).mutate(); } /*************************************************************** Attaches a Comment node and returns this, the host ***************************************************************/ Node comment (const(T[]) comment) { return comment_ (comment).mutate(); } /*************************************************************** Attaches a Doctype node and returns this, the host ***************************************************************/ Node doctype (const(T[]) doctype) { return doctype_ (doctype).mutate(); } /*************************************************************** Attaches a PI node and returns this, the host ***************************************************************/ Node pi (const(T[]) pi) { return pi_ (pi, null).mutate(); } /*************************************************************** Attaches a child Element, and returns a reference to the child ***************************************************************/ private Node element_ (const(T[]) prefix, const(T[]) local, const(T[]) value = null) { auto node = create (XmlNodeType.Element, null); append (node.set (prefix, local)); version(discrete) { if (value.length) node.data_ (value); } else { node.rawValue = value; } return node; } /*************************************************************** Attaches an Attribute, and returns the host ***************************************************************/ private Node attribute_ (const(T[]) prefix, const(T[]) local, const(T[]) value = null) { auto node = create (XmlNodeType.Attribute, value); attrib (node.set (prefix, local)); return &this; } /*************************************************************** Attaches a Data node, and returns the host ***************************************************************/ private Node data_ (const(T[]) data) { append (create (XmlNodeType.Data, data)); return &this; } /*************************************************************** Attaches a CData node, and returns the host ***************************************************************/ private Node cdata_ (const(T[]) cdata) { append (create (XmlNodeType.CData, cdata)); return &this; } /*************************************************************** Attaches a Comment node, and returns the host ***************************************************************/ private Node comment_ (const(T[]) comment) { append (create (XmlNodeType.Comment, comment)); return &this; } /*************************************************************** Attaches a PI node, and returns the host ***************************************************************/ private Node pi_ (const(T[]) pi, const(T[]) patch) { append (create(XmlNodeType.PI, pi).patch(patch)); return &this; } /*************************************************************** Attaches a Doctype node, and returns the host ***************************************************************/ private Node doctype_ (const(T[]) doctype) { append (create (XmlNodeType.Doctype, doctype)); return &this; } /*************************************************************** Append an attribute to this node, The given attribute cannot have an existing parent. ***************************************************************/ private void attrib (Node node) { assert (node.parent is null); node.host = &this; if (lastAttr) { lastAttr.nextSibling = node; node.prevSibling = lastAttr; lastAttr = node; } else firstAttr = lastAttr = node; } /*************************************************************** Append a node to this one. The given node cannot have an existing parent. ***************************************************************/ private void append (Node node) { assert (node.parent is null); node.host = &this; if (lastChild) { lastChild.nextSibling = node; node.prevSibling = lastChild; lastChild = node; } else firstChild = lastChild = node; } /*************************************************************** Prepend a node to this one. The given node cannot have an existing parent. ***************************************************************/ private void prepend (Node node) { assert (node.parent is null); node.host = &this; if (firstChild) { firstChild.prevSibling = node; node.nextSibling = firstChild; firstChild = node; } else firstChild = lastChild = node; } /*************************************************************** Configure node values ***************************************************************/ private Node set (const(T[]) prefix, const(T[]) local) { this.localName = local; this.prefixed = prefix; return &this; } /*************************************************************** Creates and returns a child Element node ***************************************************************/ private Node create (XmlNodeType type, const(T[]) value) { auto node = document.allocate(); node.rawValue = value; node.id = type; return node; } /*************************************************************** Detach this node from its parent and siblings ***************************************************************/ private Node remove() { if (! host) return &this; mutate(); if (prevSibling && nextSibling) { prevSibling.nextSibling = nextSibling; nextSibling.prevSibling = prevSibling; prevSibling = null; nextSibling = null; host = null; } else if (nextSibling) { debug assert(host.firstChild == &this); parent.firstChild = nextSibling; nextSibling.prevSibling = null; nextSibling = null; host = null; } else if (type != XmlNodeType.Attribute) { if (prevSibling) { debug assert(host.lastChild == &this); host.lastChild = prevSibling; prevSibling.nextSibling = null; prevSibling = null; host = null; } else { debug assert(host.firstChild == &this); debug assert(host.lastChild == &this); host.firstChild = null; host.lastChild = null; host = null; } } else { if (prevSibling) { debug assert(host.lastAttr == &this); host.lastAttr = prevSibling; prevSibling.nextSibling = null; prevSibling = null; host = null; } else { debug assert(host.firstAttr == &this); debug assert(host.lastAttr == &this); host.firstAttr = null; host.lastAttr = null; host = null; } } return &this; } /*************************************************************** Patch the serialization text, causing DocPrinter to ignore the subtree of this node, and instead emit the provided text as raw XML output. Warning: this function does *not* copy the provided text, and may be removed from future revisions ***************************************************************/ private Node patch (const(T[]) text) { end = text.ptr + text.length; start = text.ptr; return &this; } /*************************************************************** purge serialization cache for this node and its ancestors ***************************************************************/ private Node mutate () { auto node = &this; do { node.end = null; } while ((node = node.host) !is null); return &this; } /*************************************************************** Duplicate a single node ***************************************************************/ @property private Node dup () { return create(type, rawValue.dup).set(prefixed.dup, localName.dup); } /*************************************************************** Duplicate a subtree ***************************************************************/ private Node clone () { auto p = dup; foreach (attr; attributes) p.attrib (attr.dup); foreach (child; children) p.append (child.clone()); return p; } /*************************************************************** Reset the document host for this subtree ***************************************************************/ private void migrate (Document host) { this.doc = host; foreach (attr; attributes) attr.migrate (host); foreach (child; children) child.migrate (host); } } } /******************************************************************************* XPath support Provides support for common XPath axis and filtering functions, via a native-D interface instead of typical interpreted notation. The general idea here is to generate a NodeSet consisting of those tree-nodes which satisfy a filtering function. The direction, or axis, of tree traversal is governed by one of several predefined operations. All methods facilitiate call-chaining, where each step returns a new NodeSet instance to be operated upon. The set of nodes themselves are collected in a freelist, avoiding heap-activity and making good use of D array-slicing facilities. XPath examples --- auto doc = new Document!(char); // attach an element with some attributes, plus // a child element with an attached data value doc.tree.element (null, "element") .attribute (null, "attrib1", "value") .attribute (null, "attrib2") .element (null, "child", "value"); // select named-elements auto set = doc.query["element"]["child"]; // select all attributes named "attrib1" set = doc.query.descendant.attribute("attrib1"); // select elements with one parent and a matching text value set = doc.query[].filter((doc.Node n) {return n.children.hasData("value");}); --- Note that path queries are temporal - they do not retain content across mulitple queries. That is, the lifetime of a query result is limited unless you explicitly copy it. For example, this will fail to operate as one might expect --- auto elements = doc.query["element"]; auto children = elements["child"]; --- The above will lose elements, because the associated document reuses node space for subsequent queries. In order to retain results, do this --- auto elements = doc.query["element"].dup; auto children = elements["child"]; --- The above .dup is generally very small (a set of pointers only). On the other hand, recursive queries are fully supported --- set = doc.query[].filter((doc.Node n) {return n.query[].count > 1;}); --- Typical usage tends to exhibit the following pattern, Where each query result is processed before another is initiated --- foreach (node; doc.query.child("element")) { // do something with each node } --- Supported axis include: --- .child immediate children .parent immediate parent .next following siblings .prev prior siblings .ancestor all parents .descendant all descendants .data text children .cdata cdata children .attribute attribute children --- Each of the above accept an optional string, which is used in an axis-specific way to filter nodes. For instance, a .child("food") will filter <food> child elements. These variants are shortcuts to using a filter to post-process a result. Each of the above also have variants which accept a delegate instead. In general, you traverse an axis and operate upon the results. The operation applied may be another axis traversal, or a filtering step. All steps can be, and generally should be chained together. Filters are implemented via a delegate mechanism --- .filter (bool delegate(Node)) --- Where the delegate returns true if the node passes the filter. An example might be selecting all nodes with a specific attribute --- auto set = doc.query.descendant.filter ( (doc.Node n){return n.attributes.hasName (null, "test");} ); --- Obviously this is not as clean and tidy as true XPath notation, but that can be wrapped atop this API instead. The benefit here is one of raw throughput - important for some applications. Note that every operation returns a discrete result. Methods first() and last() also return a set of one or zero elements. Some language specific extensions are provided for too --- * .child() can be substituted with [] notation instead * [] notation can be used to index a specific element, like .nth() * the .nodes attribute exposes an underlying Node[], which may be sliced or traversed in the usual D manner --- Other (query result) utility methods include --- .dup .first .last .opIndex .nth .count .opApply --- XmlPath itself needs to be a class in order to avoid forward-ref issues. *******************************************************************************/ private class XmlPath(T) { public alias Document!(T) Doc; /// the typed document public alias Doc.Node Node; /// generic document node private Node[] freelist; private size_t freeIndex, markIndex; private size_t recursion; /*********************************************************************** Prime a query Returns a NodeSet containing just the given node, which can then be used to cascade results into subsequent NodeSet instances. ***********************************************************************/ final NodeSet start (Node root) { // we have to support recursion which may occur within // a filter callback if (recursion is 0) { if (freelist.length is 0) freelist.length = 256; freeIndex = 0; } NodeSet set = {this}; auto mark = freeIndex; allocate(root); return set.assign (mark); } /*********************************************************************** This is the meat of XPath support. All of the NodeSet operators exist here, in order to enable call-chaining. Note that some of the axis do double-duty as a filter also. This is just a convenience factor, and doesn't change the underlying mechanisms. ***********************************************************************/ struct NodeSet { private XmlPath host; public Node[] nodes; /// array of selected nodes /*************************************************************** Return a duplicate NodeSet ***************************************************************/ @property NodeSet dup () { NodeSet copy = {host}; copy.nodes = nodes.dup; return copy; } /*************************************************************** Return the number of selected nodes in the set ***************************************************************/ @property size_t count () { return nodes.length; } /*************************************************************** Return a set containing just the first node of the current set ***************************************************************/ NodeSet first () { return nth (0); } /*************************************************************** Return a set containing just the last node of the current set ***************************************************************/ NodeSet last () { auto i = nodes.length; if (i > 0) --i; return nth (i); } /*************************************************************** Return a set containing just the nth node of the current set ***************************************************************/ NodeSet opIndex (size_t i) { return nth (i); } /*************************************************************** Return a set containing just the nth node of the current set ***************************************************************/ NodeSet nth (size_t index) { NodeSet set = {host}; auto mark = host.mark(); if (index < nodes.length) host.allocate (nodes [index]); return set.assign (mark); } /*************************************************************** Return a set containing all child elements of the nodes within this set ***************************************************************/ NodeSet opSlice () { return child(); } /*************************************************************** Return a set containing all child elements of the nodes within this set, which match the given name ***************************************************************/ NodeSet opIndex (const(T[]) name) { return child (name); } /*************************************************************** Return a set containing all parent elements of the nodes within this set, which match the optional name ***************************************************************/ NodeSet parent (const(T[]) name = null) { if (name.ptr) return parent ((Node node){return node.name == name;}); return parent (&always); } /*************************************************************** Return a set containing all data nodes of the nodes within this set, which match the optional value ***************************************************************/ NodeSet data (const(T[]) value = null) { if (value.ptr) return child ((Node node){return node.value == value;}, XmlNodeType.Data); return child (&always, XmlNodeType.Data); } /*************************************************************** Return a set containing all cdata nodes of the nodes within this set, which match the optional value ***************************************************************/ NodeSet cdata (const(T[]) value = null) { if (value.ptr) return child ((Node node){return node.value == value;}, XmlNodeType.CData); return child (&always, XmlNodeType.CData); } /*************************************************************** Return a set containing all attributes of the nodes within this set, which match the optional name ***************************************************************/ NodeSet attribute (const(T[]) name = null) { if (name.ptr) return attribute ((Node node){return node.name == name;}); return attribute (&always); } /*************************************************************** Return a set containing all descendant elements of the nodes within this set, which match the given name ***************************************************************/ NodeSet descendant (const(T[]) name = null) { if (name.ptr) return descendant ((Node node){return node.name == name;}); return descendant (&always); } /*************************************************************** Return a set containing all child elements of the nodes within this set, which match the optional name ***************************************************************/ NodeSet child (const(T[]) name = null) { if (name.ptr) return child ((Node node){return node.name == name;}); return child (&always); } /*************************************************************** Return a set containing all ancestor elements of the nodes within this set, which match the optional name ***************************************************************/ NodeSet ancestor (const(T[]) name = null) { if (name.ptr) return ancestor ((Node node){return node.name == name;}); return ancestor (&always); } /*************************************************************** Return a set containing all prior sibling elements of the nodes within this set, which match the optional name ***************************************************************/ NodeSet prev (const(T[]) name = null) { if (name.ptr) return prev ((Node node){return node.name == name;}); return prev (&always); } /*************************************************************** Return a set containing all subsequent sibling elements of the nodes within this set, which match the optional name ***************************************************************/ NodeSet next (const(T[]) name = null) { if (name.ptr) return next ((Node node){return node.name == name;}); return next (&always); } /*************************************************************** Return a set containing all nodes within this set which pass the filtering test ***************************************************************/ NodeSet filter (scope bool delegate(Node) filter) { NodeSet set = {host}; auto mark = host.mark(); foreach (node; nodes) test (filter, node); return set.assign (mark); } /*************************************************************** Return a set containing all child nodes of the nodes within this set which pass the filtering test ***************************************************************/ NodeSet child (scope bool delegate(Node) filter, XmlNodeType type = XmlNodeType.Element) { NodeSet set = {host}; auto mark = host.mark(); foreach (parent; nodes) foreach (child; parent.children) if (child.id is type) test (filter, child); return set.assign (mark); } /*************************************************************** Return a set containing all attribute nodes of the nodes within this set which pass the given filtering test ***************************************************************/ NodeSet attribute (scope bool delegate(Node) filter) { NodeSet set = {host}; auto mark = host.mark(); foreach (node; nodes) foreach (attr; node.attributes) test (filter, attr); return set.assign (mark); } /*************************************************************** Return a set containing all descendant nodes of the nodes within this set, which pass the given filtering test ***************************************************************/ NodeSet descendant (scope bool delegate(Node) filter, XmlNodeType type = XmlNodeType.Element) { void traverse (Node parent) { foreach (child; parent.children) { if (child.id is type) test (filter, child); if (child.firstChild) traverse (child); } } NodeSet set = {host}; auto mark = host.mark(); foreach (node; nodes) traverse (node); return set.assign (mark); } /*************************************************************** Return a set containing all parent nodes of the nodes within this set which pass the given filtering test ***************************************************************/ NodeSet parent (scope bool delegate(Node) filter) { NodeSet set = {host}; auto mark = host.mark(); foreach (node; nodes) { auto p = node.parent; if (p && p.id != XmlNodeType.Document && !set.has(p)) { test (filter, p); // continually update our set of nodes, so // that set.has() can see a prior entry. // Ideally we'd avoid invoking test() on // prior nodes, but I don't feel the added // complexity is warranted set.nodes = host.slice (mark); } } return set.assign (mark); } /*************************************************************** Return a set containing all ancestor nodes of the nodes within this set, which pass the given filtering test ***************************************************************/ NodeSet ancestor (scope bool delegate(Node) filter) { NodeSet set = {host}; auto mark = host.mark(); void traverse (Node child) { auto p = child.host; if (p && p.id != XmlNodeType.Document && !set.has(p)) { test (filter, p); // continually update our set of nodes, so // that set.has() can see a prior entry. // Ideally we'd avoid invoking test() on // prior nodes, but I don't feel the added // complexity is warranted set.nodes = host.slice (mark); traverse (p); } } foreach (node; nodes) traverse (node); return set.assign (mark); } /*************************************************************** Return a set containing all following siblings of the ones within this set, which pass the given filtering test ***************************************************************/ NodeSet next (scope bool delegate(Node) filter, XmlNodeType type = XmlNodeType.Element) { NodeSet set = {host}; auto mark = host.mark(); foreach (node; nodes) { auto p = node.nextSibling; while (p) { if (p.id is type) test (filter, p); p = p.nextSibling; } } return set.assign (mark); } /*************************************************************** Return a set containing all prior sibling nodes of the ones within this set, which pass the given filtering test ***************************************************************/ NodeSet prev (scope bool delegate(Node) filter, XmlNodeType type = XmlNodeType.Element) { NodeSet set = {host}; auto mark = host.mark(); foreach (node; nodes) { auto p = node.prevSibling; while (p) { if (p.id is type) test (filter, p); p = p.prevSibling; } } return set.assign (mark); } /*************************************************************** Traverse the nodes of this set ***************************************************************/ int opApply (scope int delegate(ref Node) dg) { int ret; foreach (node; nodes) if ((ret = dg (node)) != 0) break; return ret; } /*************************************************************** Common predicate ***************************************************************/ private bool always (Node node) { return true; } /*************************************************************** Assign a slice of the freelist to this NodeSet ***************************************************************/ private NodeSet assign (size_t mark) { nodes = host.slice (mark); return this; } /*************************************************************** Execute a filter on the given node. We have to deal with potential query recusion, so we set all kinda crap to recover from that ***************************************************************/ private void test (scope bool delegate(Node) filter, Node node) { auto pop = host.push(); auto add = filter (node); host.pop (pop); if (add) host.allocate (node); } /*************************************************************** We typically need to filter ancestors in order to avoid duplicates, so this is used for those purposes ***************************************************************/ private bool has (Node p) { foreach (node; nodes) if (node is p) return true; return false; } } /*********************************************************************** Return the current freelist index ***********************************************************************/ private size_t mark () { return freeIndex; } /*********************************************************************** Recurse and save the current state ***********************************************************************/ private size_t push () { ++recursion; return freeIndex; } /*********************************************************************** Restore prior state ***********************************************************************/ private void pop (size_t prior) { freeIndex = prior; --recursion; } /*********************************************************************** Return a slice of the freelist ***********************************************************************/ private Node[] slice (size_t mark) { assert (mark <= freeIndex); return freelist [mark .. freeIndex]; } /*********************************************************************** Allocate an entry in the freelist, expanding as necessary ***********************************************************************/ private size_t allocate (Node node) { if (freeIndex >= freelist.length) freelist.length = freelist.length + freelist.length / 2; freelist[freeIndex] = node; return ++freeIndex; } } version (Old) { /******************************************************************************* Specification for an XML serializer *******************************************************************************/ interface IXmlPrinter(T) { public alias Document!(T) Doc; /// the typed document public alias Doc.Node Node; /// generic document node public alias print opCall; /// alias for print method /*********************************************************************** Generate a text representation of the document tree ***********************************************************************/ T[] print (Doc doc); /*********************************************************************** Generate a representation of the given node-subtree ***********************************************************************/ void print (Node root, scope void delegate(T[][]...) emit); } } /******************************************************************************* *******************************************************************************/ debug (Document) { import tango.io.Stdout; import tango.text.xml.DocPrinter; void main() { auto doc = new Document!(char); // attach an xml header doc.header; // attach an element with some attributes, plus // a child element with an attached data value doc.tree.element (null, "root") .attribute (null, "attrib1", "value") .attribute (null, "attrib2", "other") .element (null, "child") .cdata ("some text"); // attach a sibling to the interior elements doc.elements.element (null, "sibling"); bool foo (doc.Node node) { node = node.attributes.name(null, "attrib1"); return node && "value" == node.value; } foreach (node; doc.query.descendant("root").filter(&foo).child) Stdout.formatln(">> {}", node.name); foreach (node; doc.elements.attributes) Stdout.formatln("<< {}", node.name); foreach (node; doc.elements.children) Stdout.formatln("<< {}", node.name); foreach (node; doc.query.descendant.cdata) Stdout.formatln ("{}: {}", node.parent.name, node.value); // emit the result auto printer = new DocPrinter!(char); printer.print (doc, stdout); doc.reset(); } } |