123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
|
/*******************************************************************************
copyright: Copyright (c) 2006 Tango. All rights reserved
license: BSD style: $(LICENSE)
version: Jan 2006: initial release
author: Kris, Nthalk
*******************************************************************************/
module tango.io.stream.Quotes;
private import tango.io.stream.Iterator;
/*******************************************************************************
Iterate over a set of delimited, optionally-quoted, text fields.
Each field is exposed to the client as a slice of the original
content, where the slice is transient. If you need to retain the
exposed content, then you should .dup it appropriately.
The content exposed via an iterator is supposed to be entirely
read-only. All current iterators abide by this rule, but it is
possible a user could mutate the content through a get() slice.
To enforce the desired read-only aspect, the code would have to
introduce redundant copying or the compiler would have to support
read-only arrays.
Usage:
---
auto f = new File ("my.csv");
auto l = new Lines (f);
auto b = new Array (0);
auto q = new Quotes!(char)(",", b);
foreach (line; l)
{
b.assign (line);
foreach (field, index; q)
Stdout (index, field);
Stdout.newline;
}
---
See Iterator, Lines, Patterns, Delimiters.
*******************************************************************************/
class Quotes(T) : Iterator!(T)
{
private const(T)[] delim;
/***********************************************************************
This splits on delimiters only. If there is a quote, it
suspends delimiter splitting until the quote is finished.
***********************************************************************/
this (const(T)[] delim, InputStream stream = null)
{
super (stream);
this.delim = delim;
}
/***********************************************************************
This splits on delimiters only. If there is a quote, it
suspends delimiter splitting until the quote is finished.
***********************************************************************/
protected override size_t scan (const(void)[] data)
{
T quote = 0;
int escape = 0;
auto content = (cast(const(T)*) data.ptr) [0 .. data.length / T.sizeof];
foreach (i, c; content)
// within a quote block?
if (quote)
{
if (c is '\\')
++escape;
else
{
// matched the initial quote char?
if (c is quote && escape % 2 is 0)
quote = 0;
escape = 0;
}
}
else
// begin a quote block?
if (c is '"' || c is '\'')
quote = c;
else
if (has (delim, c))
return found (set (content.ptr, 0, i));
return notFound();
}
}
/*******************************************************************************
*******************************************************************************/
debug (UnitTest)
{
private import tango.io.Stdout;
private import tango.text.Util;
private import tango.io.device.Array;
unittest
{
const(char)[][] expected =
[
`0`
,``
,``
,`"3"`
,`""`
,`5`
,`",6"`
,`"7,"`
,`8`
,`"9,\\\","`
,`10`
,`',11",'`
,`"12"`
];
auto b = new Array (expected.join (","));
foreach (i, f; new Quotes!(char)(",", b))
if (i >= expected.length)
Stdout.formatln ("uhoh: unexpected match: {}, {}", i, f);
else
if (f != expected[i])
Stdout.formatln ("uhoh: bad match: {}, {}, {}", i, f, expected[i]);
}
}
|