Qore Programming Language  0.9.3.2
StreamReader.h
1 /* -*- mode: c++; indent-tabs-mode: nil -*- */
2 /*
3  StreamReader.h
4 
5  Qore Programming Language
6 
7  Copyright (C) 2016 - 2017 Qore Technologies, s.r.o.
8 
9  Permission is hereby granted, free of charge, to any person obtaining a
10  copy of this software and associated documentation files (the "Software"),
11  to deal in the Software without restriction, including without limitation
12  the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  and/or sell copies of the Software, and to permit persons to whom the
14  Software is furnished to do so, subject to the following conditions:
15 
16  The above copyright notice and this permission notice shall be included in
17  all copies or substantial portions of the Software.
18 
19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25  DEALINGS IN THE SOFTWARE.
26 
27  Note that the Qore library is released under a choice of three open-source
28  licenses: MIT (as above), LGPL 2+, or GPL 2+; see README-LICENSE for more
29  information.
30 */
31 
32 #ifndef _QORE_STREAMREADER_H
33 #define _QORE_STREAMREADER_H
34 
35 #include <cstdint>
36 
37 #include "qore/qore_bitopts.h"
38 #include "qore/InputStream.h"
39 #include "qore/intern/StringReaderHelper.h"
40 
41 DLLLOCAL extern qore_classid_t CID_STREAMREADER;
42 DLLLOCAL extern QoreClass* QC_STREAMREADER;
43 
46 public:
47  DLLLOCAL StreamReader(ExceptionSink* xsink, InputStream* is, const QoreEncoding* encoding = QCS_DEFAULT) :
48  in(is, xsink),
49  enc(encoding) {
50  }
51 
52  virtual DLLLOCAL ~StreamReader() {
53  }
54 
55  DLLLOCAL const QoreEncoding* getEncoding() const {
56  return enc;
57  }
58 
59  DLLLOCAL const InputStream* getInputStream() const {
60  return *in;
61  }
62 
63  DLLLOCAL void setEncoding(const QoreEncoding* n_enc) {
64  enc = n_enc;
65  }
66 
68 
73  DLLLOCAL BinaryNode* readBinary(int64 limit, ExceptionSink* xsink) {
74  if (limit == 0)
75  return 0;
77  char buffer[STREAMREADER_BUFFER_SIZE];
78  if (limit == -1) {
79  while (true) {
80  int rc = readData(xsink, buffer, STREAMREADER_BUFFER_SIZE, false);
81  if (*xsink)
82  return 0;
83  if (rc == 0)
84  break;
85  b->append(buffer, rc);
86  }
87  }
88  else {
89  while (limit > 0) {
90  int rc = readData(xsink, buffer, QORE_MIN(limit, STREAMREADER_BUFFER_SIZE), false);
91  if (*xsink)
92  return 0;
93  if (rc == 0)
94  break;
95  b->append(buffer, rc);
96  limit -= rc;
97  }
98  }
99 
100  return b->empty() ? 0 : b.release();
101  }
102 
104 
109  DLLLOCAL QoreStringNode* readString(int64 size, ExceptionSink* xsink) {
110  return q_read_string(xsink, size, enc, std::bind(&StreamReader::readData, this, _3, _1, _2, false));
111  }
112 
114 
120  DLLLOCAL QoreStringNode* readLine(const QoreStringNode* eol, bool trim, ExceptionSink* xsink) {
121  if (!eol && !enc->isAsciiCompat()) {
122  QoreString nl("\n");
123  return readLineEol(&nl, trim, xsink);
124  }
125 
126  return eol ? readLineEol(eol, trim, xsink) : readLine(trim, xsink);
127  }
128 
129  DLLLOCAL QoreStringNode* readLineEol(const QoreString* eol, bool trim, ExceptionSink* xsink) {
130  TempEncodingHelper eolstr(eol, enc, xsink);
131  if (*xsink)
132  return 0;
133  eolstr.removeBom();
134 
136 
137  qore_size_t eolpos = 0;
138 
139  while (true) {
140  char c;
141  int64 rc = readData(xsink, &c, 1, false);
142  //printd(5, "StreamReader::readLineEol() eolpos: %d/%d rc: %d c: %d str: '%s' (%s)\n", eolpos, eolstr->size(), rc, c, str->c_str(), enc->getCode());
143  if (*xsink)
144  return 0;
145  if (!rc)
146  return str->empty() ? 0 : q_remove_bom_utf16(str.release(), enc);
147 
148  // add the char to the string
149  str->concat(c);
150 
151  if ((**eolstr)[eolpos] == c) {
152  ++eolpos;
153  if (eolpos == eolstr->size()) {
154  if (trim)
155  str->terminate(str->size() - eolpos);
156  return q_remove_bom_utf16(str.release(), enc);
157  }
158  }
159  else if (eolpos) {
160  // check all positions to see if the string matches
161  bool found = false;
162  for (size_t i = eolpos; i; --i) {
163  // we have to use memcmp here because we could be dealing with character
164  // encodings that include nulls in the string (ex: UTF-16*)
165  if (!memcmp(eolstr->c_str(), str->c_str() + str->size() - i, i)) {
166  found = true;
167  if (eolpos != i)
168  eolpos = i;
169  break;
170  }
171  }
172  if (!found)
173  eolpos = 0;
174  }
175  }
176  }
177 
178  DLLLOCAL QoreStringNode* readLine(bool trim, ExceptionSink* xsink) {
180 
181  while (true) {
182  char c;
183  int64 rc = readData(xsink, &c, 1, false);
184  if (*xsink)
185  return 0;
186  if (!rc) { // End of stream.
187  return str->empty() ? 0 : str.release();
188  }
189 
190  if (c == '\n') {
191  if (!trim)
192  str->concat(c);
193  return str.release();
194  }
195  else if (c == '\r') {
196  if (!trim)
197  str->concat(c);
198  int64 p = peek(xsink);
199  if (*xsink)
200  return 0;
201  if (p == '\n') {
202  readData(xsink, &c, 1);
203  if (!trim)
204  str->concat((char)p);
205  }
206  return str.release();
207  }
208  str->concat(c);
209  }
210  }
211 
212  DLLLOCAL int64 readi1(ExceptionSink* xsink) {
213  char i = 0;
214  if (readData(xsink, &i, 1) < 0)
215  return 0;
216  return i;
217  }
218 
219  DLLLOCAL int64 readi2(ExceptionSink* xsink) {
220  short i = 0;
221  if (readData(xsink, &i, 2) < 0)
222  return 0;
223  i = ntohs(i);
224  return i;
225  }
226 
227  DLLLOCAL int64 readi4(ExceptionSink* xsink) {
228  int32_t i = 0;
229  if (readData(xsink, &i, 4) < 0)
230  return 0;
231  i = ntohl(i);
232  return i;
233  }
234 
235  DLLLOCAL int64 readi8(ExceptionSink* xsink) {
236  int64 i = 0;
237  if (readData(xsink, &i, 8) < 0)
238  return 0;
239  i = i8MSB(i);
240  return i;
241  }
242 
243  DLLLOCAL int64 readi2LSB(ExceptionSink* xsink) {
244  short i = 0;
245  if (readData(xsink, &i, 2) < 0)
246  return 0;
247  i = i2LSB(i);
248  return i;
249  }
250 
251  DLLLOCAL int64 readi4LSB(ExceptionSink* xsink) {
252  int32_t i = 0;
253  if (readData(xsink, &i, 4) < 0)
254  return 0;
255  i = i4LSB(i);
256  return i;
257  }
258 
259  DLLLOCAL int64 readi8LSB(ExceptionSink* xsink) {
260  int64 i = 0;
261  if (readData(xsink, &i, 8) < 0)
262  return 0;
263  i = i8LSB(i);
264  return i;
265  }
266 
267  DLLLOCAL int64 readu1(ExceptionSink* xsink) {
268  unsigned char i = 0;
269  if (readData(xsink, &i, 1) < 0)
270  return 0;
271  return i;
272  }
273 
274  DLLLOCAL int64 readu2(ExceptionSink* xsink) {
275  unsigned short i = 0;
276  if (readData(xsink, &i, 2) < 0)
277  return 0;
278  i = ntohs(i);
279  return i;
280  }
281 
282  DLLLOCAL int64 readu4(ExceptionSink* xsink) {
283  uint32_t i = 0;
284  if (readData(xsink, &i, 4) < 0)
285  return 0;
286  i = ntohl(i);
287  return i;
288  }
289 
290  DLLLOCAL int64 readu2LSB(ExceptionSink* xsink) {
291  unsigned short i = 0;
292  if (readData(xsink, &i, 2) < 0)
293  return 0;
294  i = i2LSB(i);
295  return i;
296  }
297 
298  DLLLOCAL int64 readu4LSB(ExceptionSink* xsink) {
299  uint32_t i = 0;
300  if (readData(xsink, &i, 4) < 0)
301  return 0;
302  i = i4LSB(i);
303  return i;
304  }
305 
312  int64 rc = peek(xsink);
313  if (rc < 0) {
314  if (!*xsink) {
315  if (rc == -1) {
316  xsink->raiseException("END-OF-STREAM-ERROR", "there is not enough data available in the stream; 1 byte was requested, and 0 were read");
317  }
318  else {
319  assert(*xsink);
320  }
321  }
322  return -1;
323  }
324  return rc;
325  }
326 
328 
337  DLLLOCAL virtual qore_offset_t read(ExceptionSink* xsink, void* dest, qore_size_t limit, bool require_all = true) {
338  return readData(xsink, dest, limit, require_all);
339  }
340 
341  DLLLOCAL virtual const char* getName() const { return "StreamReader"; }
342 
343 protected:
344  // default buffer size (note that I/O is generally unbuffered in this class)
345  static const int STREAMREADER_BUFFER_SIZE = 4096;
346 
349 
352 
353 private:
355 
362  DLLLOCAL virtual qore_offset_t readData(ExceptionSink* xsink, void* dest, qore_size_t limit, bool require_all = true) {
363  assert(dest);
364  assert(limit > 0);
365  char* destPtr = static_cast<char*>(dest);
366  qore_size_t read = 0;
367  while (true) {
368  int64 rc = in->read(destPtr + read, limit - read, xsink);
369  if (*xsink)
370  return -1;
371  //printd(5, "StreamReader::readData() dest: %p limit: " QLLD " read: " QLLD " rc: " QLLD " char: %d\n", dest, limit, read, rc, destPtr[0]);
372  if (!rc) {
373  if (require_all) {
374  xsink->raiseException("END-OF-STREAM-ERROR", "there is not enough data available in the stream; " QSD " bytes were requested, and " QSD " were read", limit, read);
375  return -1;
376  }
377  break;
378  }
379  read += rc;
380  if (read == limit)
381  break;
382  }
383  return read;
384  }
385 
391  virtual int64 peek(ExceptionSink* xsink) {
392  return in->peek(xsink);
393  }
394 };
395 
396 #endif // _QORE_STREAMREADER_H
DLLEXPORT const char * c_str() const
returns the string&#39;s buffer; this data should not be changed
defines string encoding functions in Qore
Definition: QoreEncoding.h:83
DLLEXPORT bool empty() const
returns true if the string is empty, false if not
DLLEXPORT bool isAsciiCompat() const
returns true if the character encoding is backwards-compatible with ASCII
the base class for all data to be used as private data of Qore objects
Definition: AbstractPrivateData.h:44
DLLEXPORT const QoreEncoding * QCS_DEFAULT
the default encoding for the Qore library
ReferenceHolder< InputStream > in
Source input stream.
Definition: StreamReader.h:348
DLLEXPORT bool empty() const
returns true if empty
virtual int64 read(void *ptr, int64 limit, ExceptionSink *xsink)=0
Reads up to `limit` bytes from the input stream.
size_t qore_size_t
used for sizes (same range as a pointer)
Definition: common.h:73
virtual DLLLOCAL qore_offset_t read(ExceptionSink *xsink, void *dest, qore_size_t limit, bool require_all=true)
Read data until a limit.
Definition: StreamReader.h:337
DLLEXPORT AbstractQoreNode * raiseException(const char *err, const char *fmt,...)
appends a Qore-language exception to the list
DLLLOCAL QoreStringNode * readString(int64 size, ExceptionSink *xsink)
Read string data from the stream.
Definition: StreamReader.h:109
Qore&#39;s string type supported by the QoreEncoding class.
Definition: QoreString.h:81
Qore&#39;s string value type, reference counted, dynamically-allocated only.
Definition: QoreStringNode.h:50
DLLEXPORT void concat(const QoreString *str, ExceptionSink *xsink)
concatenates a string and converts encodings if necessary
defines a Qore-language class
Definition: QoreClass.h:237
use this class to manage strings where the character encoding must be specified and may be different ...
Definition: QoreString.h:1015
unsigned qore_classid_t
used for the unique class ID for QoreClass objects
Definition: common.h:79
container for holding Qore-language exception information and also for registering a "thread_exit" ca...
Definition: ExceptionSink.h:46
Interface for private data of input streams.
Definition: InputStream.h:44
long long int64
64bit integer type, cannot use int64_t here since it breaks the API on some 64-bit systems due to equ...
Definition: common.h:260
virtual int64 peek(ExceptionSink *xsink)=0
Peeks the next byte from the input stream.
DLLEXPORT void terminate(qore_size_t size)
terminates the string at byte position "size", the string is reallocated if necessary ...
DLLLOCAL QoreStringNode * readLine(const QoreStringNode *eol, bool trim, ExceptionSink *xsink)
Read one line.
Definition: StreamReader.h:120
intptr_t qore_offset_t
used for offsets that could be negative
Definition: common.h:76
#define QORE_MIN(a, b)
macro to return the minimum of 2 numbers
Definition: QoreLib.h:538
DLLLOCAL BinaryNode * readBinary(int64 limit, ExceptionSink *xsink)
Read binary data from the stream.
Definition: StreamReader.h:73
int64 peekCheck(ExceptionSink *xsink)
Peeks the next byte from the input stream.
Definition: StreamReader.h:311
DLLEXPORT void removeBom()
remove any leading byte order marker (BOM) from UTF-16* strings
DLLEXPORT qore_size_t size() const
returns number of bytes in the string (not including the null pointer)
const QoreEncoding * enc
Encoding of the source input stream.
Definition: StreamReader.h:351
Private data for the Qore::StreamReader class.
Definition: StreamReader.h:45
DLLEXPORT void append(const void *nptr, qore_size_t size)
resizes the object and appends a copy of the data passed to the object
holds arbitrary binary data
Definition: BinaryNode.h:41