Qore CsvUtil Module Reference 1.11
Loading...
Searching...
No Matches
AbstractCsvIterator.qc.dox.h
1// -*- mode: c++; indent-tabs-mode: nil -*-
2// Qore AbstractCsvIterator class definition
3
4/* AbstractCsvIterator.qc Copyright 2012 - 2024 Qore Technologies, s.r.o.
5
6 Permission is hereby granted, free of charge, to any person obtaining a
7 copy of this software and associated documentation files (the "Software"),
8 to deal in the Software without restriction, including without limitation
9 the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 and/or sell copies of the Software, and to permit persons to whom the
11 Software is furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 DEALINGS IN THE SOFTWARE.
23*/
24
25// assume local var scope, do not use "$" for vars, members, and method calls
26
28namespace CsvUtil {
30
285class AbstractCsvIterator : public Qore::AbstractIterator, protected CsvHelper {
286
287public:
288protected:
290 const Options = ...;
291
292
293 // field separator
294 string separator = ",";
295
296 // field content delimiter
297 string quote = "\"";
298
299 // number of header lines
300 softint headerLines = 0;
301
302 // flag to use string names from the first header row if possible
303 bool headerNames = False;
304
305 // True if empty lines should be ignored
306 bool ignoreEmptyLines = True;
307
308 // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
309 bool ignoreWhitespace = True;
310
311 // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
312 *TimeZone timezone;
313
314 // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
315 bool checkElementCounts = False;
316
317 // getRecord/getValue returns extended hash
318 bool extendedRecord = False;
319
320 // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
321 bool compat_force_empty_string = False;
322
323 // read ahead flag
324 bool read_ahead;
325
326 // column count for verifying column counts
327 int cc;
328
329 // current record count for the index() method
330 int rc = 0;
331
332 // to resolve record type by rules
333 hash<string, hash<string, list<hash<auto>>>> m_resolve_by_rule;
334
335 // to resolve record type by number of fields
336 hash<string, list<string>> m_resolve_by_count;
337
338 // list of idx to field transformarions, in order of spec
339 hash<string, list<string>> m_resolve_by_idx;
340
341 // fake specs based on the first non-header row
342 bool fakeHeaderNames;
343
345 *string eol;
346
347 // data source iterator
348 AbstractLineIterator lineIterator;
349
350public:
351
353
360 constructor(AbstractLineIterator li, *hash<auto> opts);
361
362
364
369 // NOTE: when declared as *hash then always calls this constructor
370 constructor(AbstractLineIterator li, hash<auto> spec, hash<auto> opts);
371
372
374 hash<auto> getInfo();
375
376
378protected:
379 processCommonOptions(*hash<auto> opts, int C_OPTx);
380public:
381
382
384protected:
385 processSpec(hash<auto> spec);
386public:
387
388
390protected:
391 prepareFieldsFromHeaders(*list<auto> headers);
392public:
393
394
395 bool valid();
396
397
399
404 bool next();
405
406
408
411
412
414
421 auto memberGate(string name);
422
423
425
436 hash<auto> getValue();
437
438
440
453 hash<auto> getRecord(bool extended);
454
455
457
468 hash<auto> getRecord();
469
470
472
485
486
488
495 string getSeparator();
496
497
499
506 string getQuote();
507
508
510 *hash<string, AbstractDataField> getRecordType();
511
512
514
521 *list<string> getHeaders();
522
523
525
530 *list<string> getHeaders(string type);
531
532
534
545 int index();
546
547
549
563
564
566
575 string getRawLine();
576
577
579
589 list<*string> getRawLineValues();
590
591
592protected:
593 auto handleType(hash<auto> fh, *string val);
594public:
595
596
598protected:
599 list<*string> getLineAndSplit();
600public:
601
602
604
611 string identifyType(list<auto> rec);
612
613
615
622protected:
623 *string identifyTypeImpl(list<auto> rec);
624public:
625
626
628protected:
629 hash<auto> parseLine();
630public:
631
632 }; // AbstractCsvIterator class
633}; // CsvUtil namespace
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated
Definition AbstractCsvIterator.qc.dox.h:285
prepareFieldsFromHeaders(*list< auto > headers)
match headers provided at csv header or in options, never called for multi-type because header_names ...
hash< auto > getInfo()
Returns configuration information about the object.
hash< auto > getRecord(bool extended)
Returns the current record as a hash.
string getQuote()
Returns the current quote string.
processSpec(hash< auto > spec)
process specification and assing internal data for resolving
*string eol
the eol marker, if any
Definition AbstractCsvIterator.qc.dox.h:345
auto memberGate(string name)
Returns the given column value for the current row.
constructor(AbstractLineIterator li, *hash< auto > opts)
creates the AbstractCsvIterator with an option hash in single-type mode
string getRawLine()
Returns the current line 'as it is', i.e. the original string.
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition AbstractCsvIterator.qc.dox.h:290
peek()
Reads a single row without moving the index position.
*hash< string, AbstractDataField > getRecordType()
Returns the description of the record type, if any.
string identifyType(list< auto > rec)
Identify a fixed-length line type using identifyTypeImpl(); may be overridden if necessary.
hash< auto > getRecord()
Returns the current record as a hash.
int index()
Returns the row index being iterated, which does not necessarily correspond to the line number when t...
hash< auto > getValue()
Returns the current record as a hash.
*string identifyTypeImpl(list< auto > rec)
Identify a input record, given the raw line string. This method performs a lookup to a precalculated ...
hash< auto > parseLine()
Parses a line in the file and returns a processed list of the fields.
*list< string > getHeaders()
Returns the current record headers or NOTHING if no headers have been detected or saved yet.
int lineNumber()
Returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
processCommonOptions(*hash< auto > opts, int C_OPTx)
process common options and and assing internal fields
constructor(AbstractLineIterator li, hash< auto > spec, hash< auto > opts)
creates the AbstractCsvIterator with an option hash in multi-type mode
*list< string > getHeaders(string type)
Returns a list of headers for the given record or NOTHING if the record is not recognized.
list< *string > getLineAndSplit()
Read line split by separator/quote into list.
list< *string > getRawLineValues()
Returns the list of raw string values of the current line.
auto getRecordList()
Returns the current record as a list.
string getSeparator()
Returns the current separator string.
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
the CsvUtil namespace. All classes used in the CsvUtil module should be inside this namespace
Definition AbstractCsvIterator.qc.dox.h:28