| //Written in the D programming language |
| |
| /** |
| * Implements functionality to read Comma Separated Values and its variants |
| * from an input range of $(D dchar). |
| * |
| * Comma Separated Values provide a simple means to transfer and store |
| * tabular data. It has been common for programs to use their own |
| * variant of the CSV format. This parser will loosely follow the |
| * $(HTTP tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhere |
| * to the following criteria (differences from RFC-4180 in parentheses): |
| * |
| * $(UL |
| * $(LI A record is separated by a new line (CRLF,LF,CR)) |
| * $(LI A final record may end with a new line) |
| * $(LI A header may be provided as the first record in input) |
| * $(LI A record has fields separated by a comma (customizable)) |
| * $(LI A field containing new lines, commas, or double quotes |
| * should be enclosed in double quotes (customizable)) |
| * $(LI Double quotes in a field are escaped with a double quote) |
| * $(LI Each record should contain the same number of fields) |
| * ) |
| * |
| * Example: |
| * |
| * ------- |
| * import std.algorithm; |
| * import std.array; |
| * import std.csv; |
| * import std.stdio; |
| * import std.typecons; |
| * |
| * void main() |
| * { |
| * auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; |
| * |
| * foreach (record; csvReader!(Tuple!(string, string, int))(text)) |
| * { |
| * writefln("%s works as a %s and earns $%d per year", |
| * record[0], record[1], record[2]); |
| * } |
| * |
| * // To read the same string from the file "filename.csv": |
| * |
| * auto file = File("filename.csv", "r"); |
| * foreach (record; |
| * file.byLine.joiner("\n").csvReader!(Tuple!(string, string, int))) |
| * { |
| * writefln("%s works as a %s and earns $%d per year", |
| * record[0], record[1], record[2]); |
| * } |
| } |
| * } |
| * ------- |
| * |
| * When an input contains a header the $(D Contents) can be specified as an |
| * associative array. Passing null to signify that a header is present. |
| * |
| * ------- |
| * auto text = "Name,Occupation,Salary\r" |
| * "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"; |
| * |
| * foreach (record; csvReader!(string[string]) |
| * (text, null)) |
| * { |
| * writefln("%s works as a %s and earns $%s per year.", |
| * record["Name"], record["Occupation"], |
| * record["Salary"]); |
| * } |
| * ------- |
| * |
| * This module allows content to be iterated by record stored in a struct, |
| * class, associative array, or as a range of fields. Upon detection of an |
| * error an CSVException is thrown (can be disabled). csvNextToken has been |
| * made public to allow for attempted recovery. |
| * |
| * Disabling exceptions will lift many restrictions specified above. A quote |
| * can appear in a field if the field was not quoted. If in a quoted field any |
| * quote by itself, not at the end of a field, will end processing for that |
| * field. The field is ended when there is no input, even if the quote was not |
| * closed. |
| * |
| * See_Also: |
| * $(HTTP en.wikipedia.org/wiki/Comma-separated_values, Wikipedia |
| * Comma-separated values) |
| * |
| * Copyright: Copyright 2011 |
| * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). |
| * Authors: Jesse Phillips |
| * Source: $(PHOBOSSRC std/_csv.d) |
| */ |
| module std.csv; |
| |
| import std.conv; |
| import std.exception; // basicExceptionCtors |
| import std.range.primitives; |
| import std.traits; |
| |
| /** |
| * Exception containing the row and column for when an exception was thrown. |
| * |
| * Numbering of both row and col start at one and corresponds to the location |
| * in the file rather than any specified header. Special consideration should |
| * be made when there is failure to match the header see $(LREF |
| * HeaderMismatchException) for details. |
| * |
| * When performing type conversions, $(REF ConvException, std,conv) is stored in |
| * the $(D next) field. |
| */ |
| class CSVException : Exception |
| { |
| /// |
| size_t row, col; |
| |
| // FIXME: Use std.exception.basicExceptionCtors here once bug #11500 is fixed |
| |
| this(string msg, string file = __FILE__, size_t line = __LINE__, |
| Throwable next = null) @nogc @safe pure nothrow |
| { |
| super(msg, file, line, next); |
| } |
| |
| this(string msg, Throwable next, string file = __FILE__, |
| size_t line = __LINE__) @nogc @safe pure nothrow |
| { |
| super(msg, file, line, next); |
| } |
| |
| this(string msg, size_t row, size_t col, Throwable next = null, |
| string file = __FILE__, size_t line = __LINE__) @nogc @safe pure nothrow |
| { |
| super(msg, next, file, line); |
| this.row = row; |
| this.col = col; |
| } |
| |
| override string toString() @safe pure const |
| { |
| return "(Row: " ~ to!string(row) ~ |
| ", Col: " ~ to!string(col) ~ ") " ~ msg; |
| } |
| } |
| |
| @safe pure unittest |
| { |
| import std.string; |
| auto e1 = new Exception("Foobar"); |
| auto e2 = new CSVException("args", e1); |
| assert(e2.next is e1); |
| |
| size_t r = 13; |
| size_t c = 37; |
| |
| auto e3 = new CSVException("argv", r, c); |
| assert(e3.row == r); |
| assert(e3.col == c); |
| |
| auto em = e3.toString(); |
| assert(em.indexOf("13") != -1); |
| assert(em.indexOf("37") != -1); |
| } |
| |
| /** |
| * Exception thrown when a Token is identified to not be completed: a quote is |
| * found in an unquoted field, data continues after a closing quote, or the |
| * quoted field was not closed before data was empty. |
| */ |
| class IncompleteCellException : CSVException |
| { |
| /** |
| * Data pulled from input before finding a problem |
| * |
| * This field is populated when using $(LREF csvReader) |
| * but not by $(LREF csvNextToken) as this data will have |
| * already been fed to the output range. |
| */ |
| dstring partialData; |
| |
| mixin basicExceptionCtors; |
| } |
| |
| @safe pure unittest |
| { |
| auto e1 = new Exception("Foobar"); |
| auto e2 = new IncompleteCellException("args", e1); |
| assert(e2.next is e1); |
| } |
| |
| /** |
| * Exception thrown under different conditions based on the type of $(D |
| * Contents). |
| * |
| * Structure, Class, and Associative Array |
| * $(UL |
| * $(LI When a header is provided but a matching column is not found) |
| * ) |
| * |
| * Other |
| * $(UL |
| * $(LI When a header is provided but a matching column is not found) |
| * $(LI Order did not match that found in the input) |
| * ) |
| * |
| * Since a row and column is not meaningful when a column specified by the |
| * header is not found in the data, both row and col will be zero. Otherwise |
| * row is always one and col is the first instance found in header that |
| * occurred before the previous starting at one. |
| */ |
| class HeaderMismatchException : CSVException |
| { |
| mixin basicExceptionCtors; |
| } |
| |
| @safe pure unittest |
| { |
| auto e1 = new Exception("Foobar"); |
| auto e2 = new HeaderMismatchException("args", e1); |
| assert(e2.next is e1); |
| } |
| |
| /** |
| * Determines the behavior for when an error is detected. |
| * |
| * Disabling exception will follow these rules: |
| * $(UL |
| * $(LI A quote can appear in a field if the field was not quoted.) |
| * $(LI If in a quoted field any quote by itself, not at the end of a |
| * field, will end processing for that field.) |
| * $(LI The field is ended when there is no input, even if the quote was |
| * not closed.) |
| * $(LI If the given header does not match the order in the input, the |
| * content will return as it is found in the input.) |
| * $(LI If the given header contains columns not found in the input they |
| * will be ignored.) |
| * ) |
| */ |
| enum Malformed |
| { |
| ignore, /// No exceptions are thrown due to incorrect CSV. |
| throwException /// Use exceptions when input has incorrect CSV. |
| } |
| |
| /** |
| * Returns an input range for iterating over records found in $(D |
| * input). |
| * |
| * The $(D Contents) of the input can be provided if all the records are the |
| * same type such as all integer data: |
| * |
| * ------- |
| * string str = `76,26,22`; |
| * int[] ans = [76,26,22]; |
| * auto records = csvReader!int(str); |
| * |
| * foreach (record; records) |
| * { |
| * assert(equal(record, ans)); |
| * } |
| * ------- |
| * |
| * Example using a struct with modified delimiter: |
| * |
| * ------- |
| * string str = "Hello;65;63.63\nWorld;123;3673.562"; |
| * struct Layout |
| * { |
| * string name; |
| * int value; |
| * double other; |
| * } |
| * |
| * auto records = csvReader!Layout(str,';'); |
| * |
| * foreach (record; records) |
| * { |
| * writeln(record.name); |
| * writeln(record.value); |
| * writeln(record.other); |
| * } |
| * ------- |
| * |
| * Specifying $(D ErrorLevel) as Malformed.ignore will lift restrictions |
| * on the format. This example shows that an exception is not thrown when |
| * finding a quote in a field not quoted. |
| * |
| * ------- |
| * string str = "A \" is now part of the data"; |
| * auto records = csvReader!(string,Malformed.ignore)(str); |
| * auto record = records.front; |
| * |
| * assert(record.front == str); |
| * ------- |
| * |
| * Returns: |
| * An input range R as defined by |
| * $(REF isInputRange, std,range,primitives). When $(D Contents) is a |
| * struct, class, or an associative array, the element type of R is |
| * $(D Contents), otherwise the element type of R is itself a range with |
| * element type $(D Contents). |
| * |
| * Throws: |
| * $(LREF CSVException) When a quote is found in an unquoted field, |
| * data continues after a closing quote, the quoted field was not |
| * closed before data was empty, a conversion failed, or when the row's |
| * length does not match the previous length. |
| * |
| * $(LREF HeaderMismatchException) when a header is provided but a |
| * matching column is not found or the order did not match that found in |
| * the input. Read the exception documentation for specific details of |
| * when the exception is thrown for different types of $(D Contents). |
| */ |
| auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input, |
| Separator delimiter = ',', Separator quote = '"') |
| if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar) |
| && isSomeChar!(Separator) |
| && !is(Contents T : T[U], U : string)) |
| { |
| return CsvReader!(Contents,ErrorLevel,Range, |
| Unqual!(ElementType!Range),string[]) |
| (input, delimiter, quote); |
| } |
| |
| /** |
| * An optional $(D header) can be provided. The first record will be read in |
| * as the header. If $(D Contents) is a struct then the header provided is |
| * expected to correspond to the fields in the struct. When $(D Contents) is |
| * not a type which can contain the entire record, the $(D header) must be |
| * provided in the same order as the input or an exception is thrown. |
| * |
| * Read only column "b": |
| * |
| * ------- |
| * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; |
| * auto records = csvReader!int(str, ["b"]); |
| * |
| * auto ans = [[65],[123]]; |
| * foreach (record; records) |
| * { |
| * assert(equal(record, ans.front)); |
| * ans.popFront(); |
| * } |
| * ------- |
| * |
| * Read from header of different order: |
| * |
| * ------- |
| * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; |
| * struct Layout |
| * { |
| * int value; |
| * double other; |
| * string name; |
| * } |
| * |
| * auto records = csvReader!Layout(str, ["b","c","a"]); |
| * ------- |
| * |
| * The header can also be left empty if the input contains a header but |
| * all columns should be iterated. The header from the input can always |
| * be accessed from the header field. |
| * |
| * ------- |
| * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; |
| * auto records = csvReader(str, null); |
| * |
| * assert(records.header == ["a","b","c"]); |
| * ------- |
| * |
| * Returns: |
| * An input range R as defined by |
| * $(REF isInputRange, std,range,primitives). When $(D Contents) is a |
| * struct, class, or an associative array, the element type of R is |
| * $(D Contents), otherwise the element type of R is itself a range with |
| * element type $(D Contents). |
| * |
| * The returned range provides a header field for accessing the header |
| * from the input in array form. |
| * |
| * ------- |
| * string str = "a,b,c\nHello,65,63.63"; |
| * auto records = csvReader(str, ["a"]); |
| * |
| * assert(records.header == ["a","b","c"]); |
| * ------- |
| * |
| * Throws: |
| * $(LREF CSVException) When a quote is found in an unquoted field, |
| * data continues after a closing quote, the quoted field was not |
| * closed before data was empty, a conversion failed, or when the row's |
| * length does not match the previous length. |
| * |
| * $(LREF HeaderMismatchException) when a header is provided but a |
| * matching column is not found or the order did not match that found in |
| * the input. Read the exception documentation for specific details of |
| * when the exception is thrown for different types of $(D Contents). |
| */ |
| auto csvReader(Contents = string, |
| Malformed ErrorLevel = Malformed.throwException, |
| Range, Header, Separator = char) |
| (Range input, Header header, |
| Separator delimiter = ',', Separator quote = '"') |
| if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar) |
| && isSomeChar!(Separator) |
| && isForwardRange!Header |
| && isSomeString!(ElementType!Header)) |
| { |
| return CsvReader!(Contents,ErrorLevel,Range, |
| Unqual!(ElementType!Range),Header) |
| (input, header, delimiter, quote); |
| } |
| |
| /// |
| auto csvReader(Contents = string, |
| Malformed ErrorLevel = Malformed.throwException, |
| Range, Header, Separator = char) |
| (Range input, Header header, |
| Separator delimiter = ',', Separator quote = '"') |
| if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar) |
| && isSomeChar!(Separator) |
| && is(Header : typeof(null))) |
| { |
| return CsvReader!(Contents,ErrorLevel,Range, |
| Unqual!(ElementType!Range),string[]) |
| (input, cast(string[]) null, delimiter, quote); |
| } |
| |
| // Test standard iteration over input. |
| @safe pure unittest |
| { |
| string str = `one,"two ""quoted"""` ~ "\n\"three\nnew line\",\nfive,six"; |
| auto records = csvReader(str); |
| |
| int count; |
| foreach (record; records) |
| { |
| foreach (cell; record) |
| { |
| count++; |
| } |
| } |
| assert(count == 6); |
| } |
| |
| // Test newline on last record |
| @safe pure unittest |
| { |
| string str = "one,two\nthree,four\n"; |
| auto records = csvReader(str); |
| records.popFront(); |
| records.popFront(); |
| assert(records.empty); |
| } |
| |
| // Test shorter row length |
| @safe pure unittest |
| { |
| wstring str = "one,1\ntwo\nthree"w; |
| struct Layout |
| { |
| string name; |
| int value; |
| } |
| |
| Layout[3] ans; |
| ans[0].name = "one"; |
| ans[0].value = 1; |
| ans[1].name = "two"; |
| ans[1].value = 0; |
| ans[2].name = "three"; |
| ans[2].value = 0; |
| |
| auto records = csvReader!(Layout,Malformed.ignore)(str); |
| |
| int count; |
| foreach (record; records) |
| { |
| assert(ans[count].name == record.name); |
| assert(ans[count].value == record.value); |
| count++; |
| } |
| } |
| |
| // Test shorter row length exception |
| @safe pure unittest |
| { |
| import std.exception; |
| |
| struct A |
| { |
| string a,b,c; |
| } |
| |
| auto strs = ["one,1\ntwo", |
| "one\ntwo,2,二\nthree,3,三", |
| "one\ntwo,2\nthree,3", |
| "one,1\ntwo\nthree,3"]; |
| |
| foreach (str; strs) |
| { |
| auto records = csvReader!A(str); |
| assertThrown!CSVException((){foreach (record; records) { }}()); |
| } |
| } |
| |
| |
| // Test structure conversion interface with unicode. |
| @safe pure unittest |
| { |
| import std.math : abs; |
| |
| wstring str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"w; |
| struct Layout |
| { |
| string name; |
| int value; |
| double other; |
| } |
| |
| Layout[2] ans; |
| ans[0].name = "\U00010143Hello"; |
| ans[0].value = 65; |
| ans[0].other = 63.63; |
| ans[1].name = "World"; |
| ans[1].value = 123; |
| ans[1].other = 3673.562; |
| |
| auto records = csvReader!Layout(str); |
| |
| int count; |
| foreach (record; records) |
| { |
| assert(ans[count].name == record.name); |
| assert(ans[count].value == record.value); |
| assert(abs(ans[count].other - record.other) < 0.00001); |
| count++; |
| } |
| assert(count == ans.length); |
| } |
| |
| // Test input conversion interface |
| @safe pure unittest |
| { |
| import std.algorithm; |
| string str = `76,26,22`; |
| int[] ans = [76,26,22]; |
| auto records = csvReader!int(str); |
| |
| foreach (record; records) |
| { |
| assert(equal(record, ans)); |
| } |
| } |
| |
| // Test struct & header interface and same unicode |
| @safe unittest |
| { |
| import std.math : abs; |
| |
| string str = "a,b,c\nHello,65,63.63\n➊➋➂❹,123,3673.562"; |
| struct Layout |
| { |
| int value; |
| double other; |
| string name; |
| } |
| |
| auto records = csvReader!Layout(str, ["b","c","a"]); |
| |
| Layout[2] ans; |
| ans[0].name = "Hello"; |
| ans[0].value = 65; |
| ans[0].other = 63.63; |
| ans[1].name = "➊➋➂❹"; |
| ans[1].value = 123; |
| ans[1].other = 3673.562; |
| |
| int count; |
| foreach (record; records) |
| { |
| assert(ans[count].name == record.name); |
| assert(ans[count].value == record.value); |
| assert(abs(ans[count].other - record.other) < 0.00001); |
| count++; |
| } |
| assert(count == ans.length); |
| |
| } |
| |
| // Test header interface |
| @safe unittest |
| { |
| import std.algorithm; |
| |
| string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; |
| auto records = csvReader!int(str, ["b"]); |
| |
| auto ans = [[65],[123]]; |
| foreach (record; records) |
| { |
| assert(equal(record, ans.front)); |
| ans.popFront(); |
| } |
| |
| try |
| { |
| csvReader(str, ["c","b"]); |
| assert(0); |
| } |
| catch (HeaderMismatchException e) |
| { |
| assert(e.col == 2); |
| } |
| auto records2 = csvReader!(string,Malformed.ignore) |
| (str, ["b","a"], ',', '"'); |
| |
| auto ans2 = [["Hello","65"],["World","123"]]; |
| foreach (record; records2) |
| { |
| assert(equal(record, ans2.front)); |
| ans2.popFront(); |
| } |
| |
| str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4"; |
| records2 = csvReader!(string,Malformed.ignore) |
| (str, ["a","b","c","d"], ',', '"'); |
| |
| ans2 = [["Joe","Carpenter"],["Fred","Fly"]]; |
| foreach (record; records2) |
| { |
| assert(equal(record, ans2.front)); |
| ans2.popFront(); |
| } |
| } |
| |
| // Test null header interface |
| @safe unittest |
| { |
| string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562"; |
| auto records = csvReader(str, ["a"]); |
| |
| assert(records.header == ["a","b","c"]); |
| } |
| |
| // Test unchecked read |
| @safe pure unittest |
| { |
| string str = "one \"quoted\""; |
| foreach (record; csvReader!(string,Malformed.ignore)(str)) |
| { |
| foreach (cell; record) |
| { |
| assert(cell == "one \"quoted\""); |
| } |
| } |
| |
| str = "one \"quoted\",two \"quoted\" end"; |
| struct Ans |
| { |
| string a,b; |
| } |
| foreach (record; csvReader!(Ans,Malformed.ignore)(str)) |
| { |
| assert(record.a == "one \"quoted\""); |
| assert(record.b == "two \"quoted\" end"); |
| } |
| } |
| |
| // Test partial data returned |
| @safe pure unittest |
| { |
| string str = "\"one\nnew line"; |
| |
| try |
| { |
| foreach (record; csvReader(str)) |
| {} |
| assert(0); |
| } |
| catch (IncompleteCellException ice) |
| { |
| assert(ice.partialData == "one\nnew line"); |
| } |
| } |
| |
| // Test Windows line break |
| @safe pure unittest |
| { |
| string str = "one,two\r\nthree"; |
| |
| auto records = csvReader(str); |
| auto record = records.front; |
| assert(record.front == "one"); |
| record.popFront(); |
| assert(record.front == "two"); |
| records.popFront(); |
| record = records.front; |
| assert(record.front == "three"); |
| } |
| |
| |
| // Test associative array support with unicode separator |
| @safe unittest |
| { |
| string str = "1❁2❁3\n34❁65❁63\n34❁65❁63"; |
| |
| auto records = csvReader!(string[string])(str,["3","1"],'❁'); |
| int count; |
| foreach (record; records) |
| { |
| count++; |
| assert(record["1"] == "34"); |
| assert(record["3"] == "63"); |
| } |
| assert(count == 2); |
| } |
| |
| // Test restricted range |
| @safe unittest |
| { |
| import std.typecons; |
| struct InputRange |
| { |
| dstring text; |
| |
| this(dstring txt) |
| { |
| text = txt; |
| } |
| |
| @property auto empty() |
| { |
| return text.empty; |
| } |
| |
| void popFront() |
| { |
| text.popFront(); |
| } |
| |
| @property dchar front() |
| { |
| return text[0]; |
| } |
| } |
| auto ir = InputRange("Name,Occupation,Salary\r"d~ |
| "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"d); |
| |
| foreach (record; csvReader(ir, cast(string[]) null)) |
| foreach (cell; record) {} |
| foreach (record; csvReader!(Tuple!(string, string, int)) |
| (ir,cast(string[]) null)) {} |
| foreach (record; csvReader!(string[string]) |
| (ir,cast(string[]) null)) {} |
| } |
| |
| @safe unittest // const/immutable dchars |
| { |
| import std.algorithm.iteration : map; |
| import std.array : array; |
| const(dchar)[] c = "foo,bar\n"; |
| assert(csvReader(c).map!array.array == [["foo", "bar"]]); |
| immutable(dchar)[] i = "foo,bar\n"; |
| assert(csvReader(i).map!array.array == [["foo", "bar"]]); |
| } |
| |
| /* |
| * This struct is stored on the heap for when the structures |
| * are passed around. |
| */ |
| private pure struct Input(Range, Malformed ErrorLevel) |
| { |
| Range range; |
| size_t row, col; |
| static if (ErrorLevel == Malformed.throwException) |
| size_t rowLength; |
| } |
| |
| /* |
| * Range for iterating CSV records. |
| * |
| * This range is returned by the $(LREF csvReader) functions. It can be |
| * created in a similar manner to allow $(D ErrorLevel) be set to $(LREF |
| * Malformed).ignore if best guess processing should take place. |
| */ |
| private struct CsvReader(Contents, Malformed ErrorLevel, Range, Separator, Header) |
| if (isSomeChar!Separator && isInputRange!Range |
| && is(Unqual!(ElementType!Range) == dchar) |
| && isForwardRange!Header && isSomeString!(ElementType!Header)) |
| { |
| private: |
| Input!(Range, ErrorLevel)* _input; |
| Separator _separator; |
| Separator _quote; |
| size_t[] indices; |
| bool _empty; |
| static if (is(Contents == struct) || is(Contents == class)) |
| { |
| Contents recordContent; |
| CsvRecord!(string, ErrorLevel, Range, Separator) recordRange; |
| } |
| else static if (is(Contents T : T[U], U : string)) |
| { |
| Contents recordContent; |
| CsvRecord!(T, ErrorLevel, Range, Separator) recordRange; |
| } |
| else |
| CsvRecord!(Contents, ErrorLevel, Range, Separator) recordRange; |
| public: |
| /** |
| * Header from the input in array form. |
| * |
| * ------- |
| * string str = "a,b,c\nHello,65,63.63"; |
| * auto records = csvReader(str, ["a"]); |
| * |
| * assert(records.header == ["a","b","c"]); |
| * ------- |
| */ |
| string[] header; |
| |
| /** |
| * Constructor to initialize the input, delimiter and quote for input |
| * without a header. |
| * |
| * ------- |
| * string str = `76;^26^;22`; |
| * int[] ans = [76,26,22]; |
| * auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) |
| * (str, ';', '^'); |
| * |
| * foreach (record; records) |
| * { |
| * assert(equal(record, ans)); |
| * } |
| * ------- |
| */ |
| this(Range input, Separator delimiter, Separator quote) |
| { |
| _input = new Input!(Range, ErrorLevel)(input); |
| _separator = delimiter; |
| _quote = quote; |
| |
| prime(); |
| } |
| |
| /** |
| * Constructor to initialize the input, delimiter and quote for input |
| * with a header. |
| * |
| * ------- |
| * string str = `high;mean;low\n76;^26^;22`; |
| * auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) |
| * (str, ["high","low"], ';', '^'); |
| * |
| * int[] ans = [76,22]; |
| * foreach (record; records) |
| * { |
| * assert(equal(record, ans)); |
| * } |
| * ------- |
| * |
| * Throws: |
| * $(LREF HeaderMismatchException) when a header is provided but a |
| * matching column is not found or the order did not match that found |
| * in the input (non-struct). |
| */ |
| this(Range input, Header colHeaders, Separator delimiter, Separator quote) |
| { |
| _input = new Input!(Range, ErrorLevel)(input); |
| _separator = delimiter; |
| _quote = quote; |
| |
| size_t[string] colToIndex; |
| foreach (h; colHeaders) |
| { |
| colToIndex[h] = size_t.max; |
| } |
| |
| auto r = CsvRecord!(string, ErrorLevel, Range, Separator) |
| (_input, _separator, _quote, indices); |
| |
| size_t colIndex; |
| foreach (col; r) |
| { |
| header ~= col; |
| auto ptr = col in colToIndex; |
| if (ptr) |
| *ptr = colIndex; |
| colIndex++; |
| } |
| // The above loop empties the header row. |
| recordRange._empty = true; |
| |
| indices.length = colToIndex.length; |
| int i; |
| foreach (h; colHeaders) |
| { |
| immutable index = colToIndex[h]; |
| static if (ErrorLevel != Malformed.ignore) |
| if (index == size_t.max) |
| throw new HeaderMismatchException |
| ("Header not found: " ~ to!string(h)); |
| indices[i++] = index; |
| } |
| |
| static if (!is(Contents == struct) && !is(Contents == class)) |
| { |
| static if (is(Contents T : T[U], U : string)) |
| { |
| import std.algorithm.sorting : sort; |
| sort(indices); |
| } |
| else static if (ErrorLevel == Malformed.ignore) |
| { |
| import std.algorithm.sorting : sort; |
| sort(indices); |
| } |
| else |
| { |
| import std.algorithm.searching : findAdjacent; |
| import std.algorithm.sorting : isSorted; |
| if (!isSorted(indices)) |
| { |
| auto ex = new HeaderMismatchException |
| ("Header in input does not match specified header."); |
| findAdjacent!"a > b"(indices); |
| ex.row = 1; |
| ex.col = indices.front; |
| |
| throw ex; |
| } |
| } |
| } |
| |
| popFront(); |
| } |
| |
| /** |
| * Part of an input range as defined by |
| * $(REF isInputRange, std,range,primitives). |
| * |
| * Returns: |
| * If $(D Contents) is a struct, will be filled with record data. |
| * |
| * If $(D Contents) is a class, will be filled with record data. |
| * |
| * If $(D Contents) is a associative array, will be filled |
| * with record data. |
| * |
| * If $(D Contents) is non-struct, a $(LREF CsvRecord) will be |
| * returned. |
| */ |
| @property auto front() |
| { |
| assert(!empty); |
| static if (is(Contents == struct) || is(Contents == class)) |
| { |
| return recordContent; |
| } |
| else static if (is(Contents T : T[U], U : string)) |
| { |
| return recordContent; |
| } |
| else |
| { |
| return recordRange; |
| } |
| } |
| |
| /** |
| * Part of an input range as defined by |
| * $(REF isInputRange, std,range,primitives). |
| */ |
| @property bool empty() @safe @nogc pure nothrow const |
| { |
| return _empty; |
| } |
| |
| /** |
| * Part of an input range as defined by |
| * $(REF isInputRange, std,range,primitives). |
| * |
| * Throws: |
| * $(LREF CSVException) When a quote is found in an unquoted field, |
| * data continues after a closing quote, the quoted field was not |
| * closed before data was empty, a conversion failed, or when the |
| * row's length does not match the previous length. |
| */ |
| void popFront() |
| { |
| while (!recordRange.empty) |
| { |
| recordRange.popFront(); |
| } |
| |
| static if (ErrorLevel == Malformed.throwException) |
| if (_input.rowLength == 0) |
| _input.rowLength = _input.col; |
| |
| _input.col = 0; |
| |
| if (!_input.range.empty) |
| { |
| if (_input.range.front == '\r') |
| { |
| _input.range.popFront(); |
| if (!_input.range.empty && _input.range.front == '\n') |
| _input.range.popFront(); |
| } |
| else if (_input.range.front == '\n') |
| _input.range.popFront(); |
| } |
| |
| if (_input.range.empty) |
| { |
| _empty = true; |
| return; |
| } |
| |
| prime(); |
| } |
| |
| private void prime() |
| { |
| if (_empty) |
| return; |
| _input.row++; |
| static if (is(Contents == struct) || is(Contents == class)) |
| { |
| recordRange = typeof(recordRange) |
| (_input, _separator, _quote, null); |
| } |
| else |
| { |
| recordRange = typeof(recordRange) |
| (_input, _separator, _quote, indices); |
| } |
| |
| static if (is(Contents T : T[U], U : string)) |
| { |
| T[U] aa; |
| try |
| { |
| for (; !recordRange.empty; recordRange.popFront()) |
| { |
| aa[header[_input.col-1]] = recordRange.front; |
| } |
| } |
| catch (ConvException e) |
| { |
| throw new CSVException(e.msg, _input.row, _input.col, e); |
| } |
| |
| recordContent = aa; |
| } |
| else static if (is(Contents == struct) || is(Contents == class)) |
| { |
| static if (is(Contents == class)) |
| recordContent = new typeof(recordContent)(); |
| else |
| recordContent = typeof(recordContent).init; |
| size_t colIndex; |
| try |
| { |
| for (; !recordRange.empty;) |
| { |
| auto colData = recordRange.front; |
| scope(exit) colIndex++; |
| if (indices.length > 0) |
| { |
| foreach (ti, ToType; Fields!(Contents)) |
| { |
| if (indices[ti] == colIndex) |
| { |
| static if (!isSomeString!ToType) skipWS(colData); |
| recordContent.tupleof[ti] = to!ToType(colData); |
| } |
| } |
| } |
| else |
| { |
| foreach (ti, ToType; Fields!(Contents)) |
| { |
| if (ti == colIndex) |
| { |
| static if (!isSomeString!ToType) skipWS(colData); |
| recordContent.tupleof[ti] = to!ToType(colData); |
| } |
| } |
| } |
| recordRange.popFront(); |
| } |
| } |
| catch (ConvException e) |
| { |
| throw new CSVException(e.msg, _input.row, colIndex, e); |
| } |
| } |
| } |
| } |
| |
| @safe pure unittest |
| { |
| import std.algorithm.comparison : equal; |
| |
| string str = `76;^26^;22`; |
| int[] ans = [76,26,22]; |
| auto records = CsvReader!(int,Malformed.ignore,string,char,string[]) |
| (str, ';', '^'); |
| |
| foreach (record; records) |
| { |
| assert(equal(record, ans)); |
| } |
| } |
| |
| // Bugzilla 15545 |
| // @system due to the catch for Throwable |
| @system pure unittest |
| { |
| import std.exception : assertNotThrown; |
| enum failData = |
| "name, surname, age |
| Joe, Joker, 99\r"; |
| auto r = csvReader(failData); |
| assertNotThrown((){foreach (entry; r){}}()); |
| } |
| |
| /* |
| * This input range is accessible through $(LREF CsvReader) when the |
| * requested $(D Contents) type is neither a structure or an associative array. |
| */ |
| private struct CsvRecord(Contents, Malformed ErrorLevel, Range, Separator) |
| if (!is(Contents == class) && !is(Contents == struct)) |
| { |
| import std.array : appender; |
| private: |
| Input!(Range, ErrorLevel)* _input; |
| Separator _separator; |
| Separator _quote; |
| Contents curContentsoken; |
| typeof(appender!(dchar[])()) _front; |
| bool _empty; |
| size_t[] _popCount; |
| public: |
| /* |
| * Params: |
| * input = Pointer to a character input range |
| * delimiter = Separator for each column |
| * quote = Character used for quotation |
| * indices = An array containing which columns will be returned. |
| * If empty, all columns are returned. List must be in order. |
| */ |
| this(Input!(Range, ErrorLevel)* input, Separator delimiter, |
| Separator quote, size_t[] indices) |
| { |
| _input = input; |
| _separator = delimiter; |
| _quote = quote; |
| _front = appender!(dchar[])(); |
| _popCount = indices.dup; |
| |
| // If a header was given, each call to popFront will need |
| // to eliminate so many tokens. This calculates |
| // how many will be skipped to get to the next header column |
| size_t normalizer; |
| foreach (ref c; _popCount) |
| { |
| static if (ErrorLevel == Malformed.ignore) |
| { |
| // If we are not throwing exceptions |
| // a header may not exist, indices are sorted |
| // and will be size_t.max if not found. |
| if (c == size_t.max) |
| break; |
| } |
| c -= normalizer; |
| normalizer += c + 1; |
| } |
| |
| prime(); |
| } |
| |
| /** |
| * Part of an input range as defined by |
| * $(REF isInputRange, std,range,primitives). |
| */ |
| @property Contents front() @safe pure |
| { |
| assert(!empty); |
| return curContentsoken; |
| } |
| |
| /** |
| * Part of an input range as defined by |
| * $(REF isInputRange, std,range,primitives). |
| */ |
| @property bool empty() @safe pure nothrow @nogc const |
| { |
| return _empty; |
| } |
| |
| /* |
| * CsvRecord is complete when input |
| * is empty or starts with record break |
| */ |
| private bool recordEnd() |
| { |
| if (_input.range.empty |
| || _input.range.front == '\n' |
| || _input.range.front == '\r') |
| { |
| return true; |
| } |
| return false; |
| } |
| |
| |
| /** |
| * Part of an input range as defined by |
| * $(REF isInputRange, std,range,primitives). |
| * |
| * Throws: |
| * $(LREF CSVException) When a quote is found in an unquoted field, |
| * data continues after a closing quote, the quoted field was not |
| * closed before data was empty, a conversion failed, or when the |
| * row's length does not match the previous length. |
| */ |
| void popFront() |
| { |
| static if (ErrorLevel == Malformed.throwException) |
| import std.format : format; |
| // Skip last of record when header is depleted. |
| if (_popCount.ptr && _popCount.empty) |
| while (!recordEnd()) |
| { |
| prime(1); |
| } |
| |
| if (recordEnd()) |
| { |
| _empty = true; |
| static if (ErrorLevel == Malformed.throwException) |
| if (_input.rowLength != 0) |
| if (_input.col != _input.rowLength) |
| throw new CSVException( |
| format("Row %s's length %s does not match "~ |
| "previous length of %s.", _input.row, |
| _input.col, _input.rowLength)); |
| return; |
| } |
| else |
| { |
| static if (ErrorLevel == Malformed.throwException) |
| if (_input.rowLength != 0) |
| if (_input.col > _input.rowLength) |
| throw new CSVException( |
| format("Row %s's length %s does not match "~ |
| "previous length of %s.", _input.row, |
| _input.col, _input.rowLength)); |
| } |
| |
| // Separator is left on the end of input from the last call. |
| // This cannot be moved to after the call to csvNextToken as |
| // there may be an empty record after it. |
| if (_input.range.front == _separator) |
| _input.range.popFront(); |
| |
| _front.shrinkTo(0); |
| |
| prime(); |
| } |
| |
| /* |
| * Handles moving to the next skipNum token. |
| */ |
| private void prime(size_t skipNum) |
| { |
| foreach (i; 0 .. skipNum) |
| { |
| _input.col++; |
| _front.shrinkTo(0); |
| if (_input.range.front == _separator) |
| _input.range.popFront(); |
| |
| try |
| csvNextToken!(Range, ErrorLevel, Separator) |
| (_input.range, _front, _separator, _quote,false); |
| catch (IncompleteCellException ice) |
| { |
| ice.row = _input.row; |
| ice.col = _input.col; |
| ice.partialData = _front.data.idup; |
| throw ice; |
| } |
| catch (ConvException e) |
| { |
| throw new CSVException(e.msg, _input.row, _input.col, e); |
| } |
| } |
| } |
| |
| private void prime() |
| { |
| try |
| { |
| _input.col++; |
| csvNextToken!(Range, ErrorLevel, Separator) |
| (_input.range, _front, _separator, _quote,false); |
| } |
| catch (IncompleteCellException ice) |
| { |
| ice.row = _input.row; |
| ice.col = _input.col; |
| ice.partialData = _front.data.idup; |
| throw ice; |
| } |
| |
| auto skipNum = _popCount.empty ? 0 : _popCount.front; |
| if (!_popCount.empty) |
| _popCount.popFront(); |
| |
| if (skipNum == size_t.max) |
| { |
| while (!recordEnd()) |
| prime(1); |
| _empty = true; |
| return; |
| } |
| |
| if (skipNum) |
| prime(skipNum); |
| |
| auto data = _front.data; |
| static if (!isSomeString!Contents) skipWS(data); |
| try curContentsoken = to!Contents(data); |
| catch (ConvException e) |
| { |
| throw new CSVException(e.msg, _input.row, _input.col, e); |
| } |
| } |
| } |
| |
| /** |
| * Lower level control over parsing CSV |
| * |
| * This function consumes the input. After each call the input will |
| * start with either a delimiter or record break (\n, \r\n, \r) which |
| * must be removed for subsequent calls. |
| * |
| * Params: |
| * input = Any CSV input |
| * ans = The first field in the input |
| * sep = The character to represent a comma in the specification |
| * quote = The character to represent a quote in the specification |
| * startQuoted = Whether the input should be considered to already be in |
| * quotes |
| * |
| * Throws: |
| * $(LREF IncompleteCellException) When a quote is found in an unquoted |
| * field, data continues after a closing quote, or the quoted field was |
| * not closed before data was empty. |
| */ |
| void csvNextToken(Range, Malformed ErrorLevel = Malformed.throwException, |
| Separator, Output) |
| (ref Range input, ref Output ans, |
| Separator sep, Separator quote, |
| bool startQuoted = false) |
| if (isSomeChar!Separator && isInputRange!Range |
| && is(Unqual!(ElementType!Range) == dchar) |
| && isOutputRange!(Output, dchar)) |
| { |
| bool quoted = startQuoted; |
| bool escQuote; |
| if (input.empty) |
| return; |
| |
| if (input.front == '\n') |
| return; |
| if (input.front == '\r') |
| return; |
| |
| if (input.front == quote) |
| { |
| quoted = true; |
| input.popFront(); |
| } |
| |
| while (!input.empty) |
| { |
| assert(!(quoted && escQuote)); |
| if (!quoted) |
| { |
| // When not quoted the token ends at sep |
| if (input.front == sep) |
| break; |
| if (input.front == '\r') |
| break; |
| if (input.front == '\n') |
| break; |
| } |
| if (!quoted && !escQuote) |
| { |
| if (input.front == quote) |
| { |
| // Not quoted, but quote found |
| static if (ErrorLevel == Malformed.throwException) |
| throw new IncompleteCellException( |
| "Quote located in unquoted token"); |
| else static if (ErrorLevel == Malformed.ignore) |
| ans.put(quote); |
| } |
| else |
| { |
| // Not quoted, non-quote character |
| ans.put(input.front); |
| } |
| } |
| else |
| { |
| if (input.front == quote) |
| { |
| // Quoted, quote found |
| // By turning off quoted and turning on escQuote |
| // I can tell when to add a quote to the string |
| // escQuote is turned to false when it escapes a |
| // quote or is followed by a non-quote (see outside else). |
| // They are mutually exclusive, but provide different |
| // information. |
| if (escQuote) |
| { |
| escQuote = false; |
| quoted = true; |
| ans.put(quote); |
| } else |
| { |
| escQuote = true; |
| quoted = false; |
| } |
| } |
| else |
| { |
| // Quoted, non-quote character |
| if (escQuote) |
| { |
| static if (ErrorLevel == Malformed.throwException) |
| throw new IncompleteCellException( |
| "Content continues after end quote, " ~ |
| "or needs to be escaped."); |
| else static if (ErrorLevel == Malformed.ignore) |
| break; |
| } |
| ans.put(input.front); |
| } |
| } |
| input.popFront(); |
| } |
| |
| static if (ErrorLevel == Malformed.throwException) |
| if (quoted && (input.empty || input.front == '\n' || input.front == '\r')) |
| throw new IncompleteCellException( |
| "Data continues on future lines or trailing quote"); |
| |
| } |
| |
| /// |
| @safe unittest |
| { |
| import std.array : appender; |
| import std.range.primitives : popFront; |
| |
| string str = "65,63\n123,3673"; |
| |
| auto a = appender!(char[])(); |
| |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "65"); |
| assert(str == ",63\n123,3673"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "63"); |
| assert(str == "\n123,3673"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "123"); |
| assert(str == ",3673"); |
| } |
| |
| // Test csvNextToken on simplest form and correct format. |
| @safe pure unittest |
| { |
| import std.array; |
| |
| string str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"; |
| |
| auto a = appender!(dchar[])(); |
| csvNextToken!string(str,a,',','"'); |
| assert(a.data == "\U00010143Hello"); |
| assert(str == ",65,63.63\nWorld,123,3673.562"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "65"); |
| assert(str == ",63.63\nWorld,123,3673.562"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "63.63"); |
| assert(str == "\nWorld,123,3673.562"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "World"); |
| assert(str == ",123,3673.562"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "123"); |
| assert(str == ",3673.562"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "3673.562"); |
| assert(str == ""); |
| } |
| |
| // Test quoted tokens |
| @safe pure unittest |
| { |
| import std.array; |
| |
| string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"; |
| |
| auto a = appender!(dchar[])(); |
| csvNextToken!string(str,a,',','"'); |
| assert(a.data == "one"); |
| assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "two"); |
| assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "three \"quoted\""); |
| assert(str == `,"",` ~ "\"five\nnew line\"\nsix"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == ""); |
| assert(str == ",\"five\nnew line\"\nsix"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "five\nnew line"); |
| assert(str == "\nsix"); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "six"); |
| assert(str == ""); |
| } |
| |
| // Test empty data is pulled at end of record. |
| @safe pure unittest |
| { |
| import std.array; |
| |
| string str = "one,"; |
| auto a = appender!(dchar[])(); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == "one"); |
| assert(str == ","); |
| |
| a.shrinkTo(0); |
| csvNextToken(str,a,',','"'); |
| assert(a.data == ""); |
| } |
| |
| // Test exceptions |
| @safe pure unittest |
| { |
| import std.array; |
| |
| string str = "\"one\nnew line"; |
| |
| typeof(appender!(dchar[])()) a; |
| try |
| { |
| a = appender!(dchar[])(); |
| csvNextToken(str,a,',','"'); |
| assert(0); |
| } |
| catch (IncompleteCellException ice) |
| { |
| assert(a.data == "one\nnew line"); |
| assert(str == ""); |
| } |
| |
| str = "Hello world\""; |
| |
| try |
| { |
| a = appender!(dchar[])(); |
| csvNextToken(str,a,',','"'); |
| assert(0); |
| } |
| catch (IncompleteCellException ice) |
| { |
| assert(a.data == "Hello world"); |
| assert(str == "\""); |
| } |
| |
| str = "one, two \"quoted\" end"; |
| |
| a = appender!(dchar[])(); |
| csvNextToken!(string,Malformed.ignore)(str,a,',','"'); |
| assert(a.data == "one"); |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken!(string,Malformed.ignore)(str,a,',','"'); |
| assert(a.data == " two \"quoted\" end"); |
| } |
| |
| // Test modifying token delimiter |
| @safe pure unittest |
| { |
| import std.array; |
| |
| string str = `one|two|/three "quoted"/|//`; |
| |
| auto a = appender!(dchar[])(); |
| csvNextToken(str,a, '|','/'); |
| assert(a.data == "one"d); |
| assert(str == `|two|/three "quoted"/|//`); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a, '|','/'); |
| assert(a.data == "two"d); |
| assert(str == `|/three "quoted"/|//`); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a, '|','/'); |
| assert(a.data == `three "quoted"`); |
| assert(str == `|//`); |
| |
| str.popFront(); |
| a.shrinkTo(0); |
| csvNextToken(str,a, '|','/'); |
| assert(a.data == ""d); |
| } |
| |
| // Bugzilla 8908 |
| @safe pure unittest |
| { |
| string csv = ` 1.0, 2.0, 3.0 |
| 4.0, 5.0, 6.0`; |
| |
| static struct Data { real a, b, c; } |
| size_t i = 0; |
| foreach (data; csvReader!Data(csv)) with (data) |
| { |
| int[] row = [cast(int) a, cast(int) b, cast(int) c]; |
| if (i == 0) |
| assert(row == [1, 2, 3]); |
| else |
| assert(row == [4, 5, 6]); |
| ++i; |
| } |
| |
| i = 0; |
| foreach (data; csvReader!real(csv)) |
| { |
| auto a = data.front; data.popFront(); |
| auto b = data.front; data.popFront(); |
| auto c = data.front; |
| int[] row = [cast(int) a, cast(int) b, cast(int) c]; |
| if (i == 0) |
| assert(row == [1, 2, 3]); |
| else |
| assert(row == [4, 5, 6]); |
| ++i; |
| } |
| } |