| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361 | /******************************************************************************* Copyright: Copyright (C) 2008 Kris Bell. All rights reserved. License: BSD style: $(LICENSE) version: Aug 2008: Initial release Authors: Kris *******************************************************************************/ module tango.text.xml.DocEntity; private import Util = tango.text.Util; /****************************************************************************** Convert XML entity patterns to normal characters <pre> & => ; " => " etc. </pre> ******************************************************************************/ T[] fromEntity (T) (const(T)[] src, T[] dst = null) { ptrdiff_t delta; auto s = src.ptr; auto len = src.length; // take a peek first to see if there's anything if ((delta = Util.indexOf (s, '&', len)) < len) { // make some room if not enough provided if (dst.length < src.length) dst.length = src.length; auto d = dst.ptr; // copy segments over, a chunk at a time do { d [0 .. delta] = s [0 .. delta]; len -= delta; s += delta; d += delta; // translate entity auto token = 0; switch (s[1]) { case 'a': if (len > 4 && s[1..5] == "amp;") *d++ = '&', token = 5; else if (len > 5 && s[1..6] == "apos;") *d++ = '\'', token = 6; break; case 'g': if (len > 3 && s[1..4] == "gt;") *d++ = '>', token = 4; break; case 'l': if (len > 3 && s[1..4] == "lt;") *d++ = '<', token = 4; break; case 'q': if (len > 5 && s[1..6] == "quot;") *d++ = '"', token = 6; break; default: break; } if (token is 0) *d++ = '&', token = 1; s += token, len -= token; } while ((delta = Util.indexOf (s, '&', len)) < len); // copy tail too d [0 .. len] = s [0 .. len]; return dst [0 .. (d + len) - dst.ptr]; } if(dst.length < src.length) dst.length = src.length; dst[0..src.length] = src; return dst[0..src.length]; } /****************************************************************************** Convert XML entity patterns to normal characters --- & => ; " => " etc --- This variant does not require an interim workspace, and instead emits directly via the provided delegate ******************************************************************************/ void fromEntity (T) (const(T)[] src, scope void delegate(const(T)[]) emit) { ptrdiff_t delta; auto s = src.ptr; auto len = src.length; // take a peek first to see if there's anything if ((delta = Util.indexOf (s, '&', len)) < len) { // copy segments over, a chunk at a time do { emit (s [0 .. delta]); len -= delta; s += delta; // translate entity auto token = 0; switch (s[1]) { case 'a': if (len > 4 && s[1..5] == "amp;") emit("&"), token = 5; else if (len > 5 && s[1..6] == "apos;") emit("'"), token = 6; break; case 'g': if (len > 3 && s[1..4] == "gt;") emit(">"), token = 4; break; case 'l': if (len > 3 && s[1..4] == "lt;") emit("<"), token = 4; break; case 'q': if (len > 5 && s[1..6] == "quot;") emit("\""), token = 6; break; default: break; } if (token is 0) emit ("&"), token = 1; s += token, len -= token; } while ((delta = Util.indexOf (s, '&', len)) < len); // copy tail too emit (s [0 .. len]); } else emit (src); } /****************************************************************************** Convert reserved chars to entities. For example: " => " A slice of the provided output buffer is returned. The output buffer should be sufficiently large to accomodate the converted output, or it will be allocated from the heap instead ******************************************************************************/ T[] toEntity(T) (const(T)[] src, T[] dst = null) { const(T)[] entity; auto s = src.ptr; auto t = s; auto e = s + src.length; auto index = 0; while (s < e) switch (*s) { case '"': entity = """; goto common; case '>': entity = ">"; goto common; case '<': entity = "<"; goto common; case '&': entity = "&"; goto common; case '\'': entity = "'"; goto common; common: auto len = s - t; if (dst.length <= index + len + entity.length) dst.length = (dst.length + len + entity.length) + dst.length / 2; dst [index .. index + len] = t [0 .. len]; index += len; dst [index .. index + entity.length] = entity; index += entity.length; t = ++s; break; default: ++s; break; } // did we change anything? if (index) { // copy tail too auto len = e - t; if (dst.length <= index + len) dst.length = index + len; dst [index .. index + len] = t [0 .. len]; return dst [0 .. index + len]; } if(dst.length < src.length) dst.length = src.length; dst[0..src.length] = src; return dst[0..src.length]; } /****************************************************************************** Convert reserved chars to entities. For example: " => " This variant does not require an interim workspace, and instead emits directly via the provided delegate ******************************************************************************/ void toEntity(T) (const(T)[] src, scope void delegate(const(T)[]) emit) { const(T)[] entity; auto s = src.ptr; auto t = s; auto e = s + src.length; while (s < e) switch (*s) { case '"': entity = """; goto common; case '>': entity = ">"; goto common; case '<': entity = "<"; goto common; case '&': entity = "&"; goto common; case '\'': entity = "'"; goto common; common: if (s - t > 0) emit (t [0 .. s - t]); emit (entity); t = ++s; break; default: ++s; break; } // did we change anything? Copy tail also if (entity.length) emit (t [0 .. e - t]); else emit (src); } /******************************************************************************* *******************************************************************************/ debug (DocEntity) { import tango.io.Console; void main() { auto s = fromEntity ("&"); assert (s == "&"); s = fromEntity ("""); assert (s == "\""); s = fromEntity ("'"); assert (s == "'"); s = fromEntity (">"); assert (s == ">"); s = fromEntity ("<"); assert (s == "<"); s = fromEntity ("<&'"); assert (s == "<&'"); s = fromEntity ("*<&'*"); assert (s == "*<&'*"); assert (fromEntity ("abc") == "abc"); assert (fromEntity ("abc&") == "abc&"); assert (fromEntity ("abc<") == "abc<"); assert (fromEntity ("abc>goo") == "abc>goo"); assert (fromEntity ("&") == "&"); assert (fromEntity (""'") == "\"'"); assert (fromEntity ("&q&s") == "&q&s"); auto d = toEntity (">"); assert (d == ">"); d = toEntity ("<"); assert (d == "<"); d = toEntity ("&"); assert (d == "&"); d = toEntity ("'"); assert (d == "'"); d = toEntity ("\""); assert (d == """); d = toEntity ("^^>*>*"); assert (d == "^^>*>*"); } } |