Qore CsvUtil Module Reference 1.10
Loading...
Searching...
No Matches
AbstractCsvIterator.qc.dox.h
1// -*- mode: c++; indent-tabs-mode: nil -*-
2// Qore AbstractCsvIterator class definition
3
4/* AbstractCsvIterator.qc Copyright 2012 - 2023 Qore Technologies, s.r.o.
5
6 Permission is hereby granted, free of charge, to any person obtaining a
7 copy of this software and associated documentation files (the "Software"),
8 to deal in the Software without restriction, including without limitation
9 the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 and/or sell copies of the Software, and to permit persons to whom the
11 Software is furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 DEALINGS IN THE SOFTWARE.
23*/
24
25// assume local var scope, do not use "$" for vars, members, and method calls
26
28namespace CsvUtil {
30
285class AbstractCsvIterator : public Qore::AbstractIterator, protected CsvHelper {
286
287public:
288protected:
290 const Options = {
291 "compat_force_empty_string": C_OPT1|C_OPT2,
292 "date_format": C_OPT1|C_OPT2,
293 "date-format": C_OPT1|C_OPT2,
294 "encoding": C_OPT1|C_OPT2,
295 "eol": C_OPT1|C_OPT2,
296 "extended_record": C_OPT2,
297 "fields": C_OPT1,
298 "header-lines": C_OPT1|C_OPT2,
299 "header_lines": C_OPT1|C_OPT2,
300 "header-names": C_OPT1|C_OPT2,
301 "header_names": C_OPT1|C_OPT2,
302 "header_reorder": C_OPT1|C_OPT2,
303 "headers": C_OPT1,
304 "ignore-empty": C_OPT1|C_OPT2,
305 "ignore_empty": C_OPT1|C_OPT2,
306 "ignore-whitespace": C_OPT1|C_OPT2,
307 "ignore_whitespace": C_OPT1|C_OPT2,
308 "number_format": C_OPT1|C_OPT2,
309 "quote": C_OPT1|C_OPT2,
310 "separator": C_OPT1|C_OPT2,
311 "timezone": C_OPT1|C_OPT2,
312 "tolwr": C_OPT1|C_OPT2,
313 "verify-columns": C_OPT1|C_OPT2,
314 "verify_columns": C_OPT1|C_OPT2,
315 };
316
317 // field separator
318 string separator = ",";
319
320 // field content delimiter
321 string quote = "\"";
322
323 // number of header lines
324 softint headerLines = 0;
325
326 // flag to use string names from the first header row if possible
327 bool headerNames = False;
328
329 // True if empty lines should be ignored
330 bool ignoreEmptyLines = True;
331
332 // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
333 bool ignoreWhitespace = True;
334
335 // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
336 *TimeZone timezone;
337
338 // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
339 bool checkElementCounts = False;
340
341 // getRecord/getValue returns extended hash
342 bool extendedRecord = False;
343
344 // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
345 bool compat_force_empty_string = False;
346
347 // read ahead flag
348 bool read_ahead;
349
350 // column count for verifying column counts
351 int cc;
352
353 // current record count for the index() method
354 int rc = 0;
355
356 // to resolve record type by rules
357 hash<string, hash<string, list<hash<auto>>>> m_resolve_by_rule;
358
359 // to resolve record type by number of fields
360 hash<string, list<string>> m_resolve_by_count;
361
362 // list of idx to field transformarions, in order of spec
363 hash<string, list<string>> m_resolve_by_idx;
364
365 // fake specs based on the first non-header row
366 bool fakeHeaderNames;
367
369 *string eol;
370
371 // data source iterator
372 AbstractLineIterator lineIterator;
373
374public:
375
377
384 constructor(AbstractLineIterator li, *hash<auto> opts);
385
386
388
393 // NOTE: when declared as *hash then always calls this constructor
394 constructor(AbstractLineIterator li, hash<auto> spec, hash<auto> opts);
395
396
398protected:
399 processCommonOptions(*hash<auto> opts, int C_OPTx);
400public:
401
402
404protected:
405 processSpec(hash<auto> spec);
406public:
407
408
410protected:
411 prepareFieldsFromHeaders(*list<auto> headers);
412public:
413
414
415 bool valid();
416
417
419
424 bool next();
425
426
428
431
432
434
441 auto memberGate(string name);
442
443
445
456 hash<auto> getValue();
457
458
460
473 hash<auto> getRecord(bool extended);
474
475
477
488 hash<auto> getRecord();
489
490
492
505
506
508
515 string getSeparator();
516
517
519
526 string getQuote();
527
528
530 *hash<string, AbstractDataField> getRecordType();
531
532
534
541 *list<string> getHeaders();
542
543
545
550 *list<string> getHeaders(string type);
551
552
554
565 int index();
566
567
569
583
584
586
595 string getRawLine();
596
597
599
609 list<*string> getRawLineValues();
610
611
612protected:
613 auto handleType(hash<auto> fh, *string val);
614public:
615
616
618protected:
619 list<*string> getLineAndSplit();
620public:
621
622
624
631 string identifyType(list<auto> rec);
632
633
635
642protected:
643 *string identifyTypeImpl(list<auto> rec);
644public:
645
646
648protected:
649 hash<auto> parseLine();
650public:
651
652 }; // AbstractCsvIterator class
653}; // CsvUtil namespace
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated
Definition: AbstractCsvIterator.qc.dox.h:285
prepareFieldsFromHeaders(*list< auto > headers)
match headers provided at csv header or in options, never called for multi-type because header_names ...
hash< auto > getRecord(bool extended)
Returns the current record as a hash.
string getQuote()
Returns the current quote string.
processSpec(hash< auto > spec)
process specification and assing internal data for resolving
*string eol
the eol marker, if any
Definition: AbstractCsvIterator.qc.dox.h:369
auto memberGate(string name)
Returns the given column value for the current row.
constructor(AbstractLineIterator li, *hash< auto > opts)
creates the AbstractCsvIterator with an option hash in single-type mode
string getRawLine()
Returns the current line 'as it is', i.e. the original string.
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: AbstractCsvIterator.qc.dox.h:290
peek()
Reads a single row without moving the index position.
*hash< string, AbstractDataField > getRecordType()
Returns the description of the record type, if any.
string identifyType(list< auto > rec)
Identify a fixed-length line type using identifyTypeImpl(); may be overridden if necessary.
hash< auto > getRecord()
Returns the current record as a hash.
int index()
Returns the row index being iterated, which does not necessarily correspond to the line number when t...
hash< auto > getValue()
Returns the current record as a hash.
*string identifyTypeImpl(list< auto > rec)
Identify a input record, given the raw line string. This method performs a lookup to a precalculated ...
hash< auto > parseLine()
Parses a line in the file and returns a processed list of the fields.
*list< string > getHeaders()
Returns the current record headers or NOTHING if no headers have been detected or saved yet.
int lineNumber()
Returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
processCommonOptions(*hash< auto > opts, int C_OPTx)
process common options and and assing internal fields
constructor(AbstractLineIterator li, hash< auto > spec, hash< auto > opts)
creates the AbstractCsvIterator with an option hash in multi-type mode
*list< string > getHeaders(string type)
Returns a list of headers for the given record or NOTHING if the record is not recognized.
list< *string > getLineAndSplit()
Read line split by separator/quote into list.
list< *string > getRawLineValues()
Returns the list of raw string values of the current line.
auto getRecordList()
Returns the current record as a list.
string getSeparator()
Returns the current separator string.
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
the CsvUtil namespace. All classes used in the CsvUtil module should be inside this namespace
Definition: AbstractCsvIterator.qc.dox.h:28