| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 | /******************************************************************************* copyright: Copyright (c) 2006 Tango. All rights reserved license: BSD style: $(LICENSE) version: Jan 2006: initial release author: Kris, Nthalk *******************************************************************************/ module tango.io.stream.Quotes; private import tango.io.stream.Iterator; /******************************************************************************* Iterate over a set of delimited, optionally-quoted, text fields. Each field is exposed to the client as a slice of the original content, where the slice is transient. If you need to retain the exposed content, then you should .dup it appropriately. The content exposed via an iterator is supposed to be entirely read-only. All current iterators abide by this rule, but it is possible a user could mutate the content through a get() slice. To enforce the desired read-only aspect, the code would have to introduce redundant copying or the compiler would have to support read-only arrays. Usage: --- auto f = new File ("my.csv"); auto l = new Lines (f); auto b = new Array (0); auto q = new Quotes!(char)(",", b); foreach (line; l) { b.assign (line); foreach (field, index; q) Stdout (index, field); Stdout.newline; } --- See Iterator, Lines, Patterns, Delimiters. *******************************************************************************/ class Quotes(T) : Iterator!(T) { private const(T)[] delim; /*********************************************************************** This splits on delimiters only. If there is a quote, it suspends delimiter splitting until the quote is finished. ***********************************************************************/ this (const(T)[] delim, InputStream stream = null) { super (stream); this.delim = delim; } /*********************************************************************** This splits on delimiters only. If there is a quote, it suspends delimiter splitting until the quote is finished. ***********************************************************************/ protected override size_t scan (const(void)[] data) { T quote = 0; int escape = 0; auto content = (cast(const(T)*) data.ptr) [0 .. data.length / T.sizeof]; foreach (i, c; content) // within a quote block? if (quote) { if (c is '\\') ++escape; else { // matched the initial quote char? if (c is quote && escape % 2 is 0) quote = 0; escape = 0; } } else // begin a quote block? if (c is '"' || c is '\'') quote = c; else if (has (delim, c)) return found (set (content.ptr, 0, i)); return notFound(); } } /******************************************************************************* *******************************************************************************/ debug (UnitTest) { private import tango.io.Stdout; private import tango.text.Util; private import tango.io.device.Array; unittest { const(char)[][] expected = [ `0` ,`` ,`` ,`"3"` ,`""` ,`5` ,`",6"` ,`"7,"` ,`8` ,`"9,\\\","` ,`10` ,`',11",'` ,`"12"` ]; auto b = new Array (expected.join (",")); foreach (i, f; new Quotes!(char)(",", b)) if (i >= expected.length) Stdout.formatln ("uhoh: unexpected match: {}, {}", i, f); else if (f != expected[i]) Stdout.formatln ("uhoh: bad match: {}, {}, {}", i, f, expected[i]); } } |