Qore Programming Language  1.7.0
StreamReader.h
1 /* -*- mode: c++; indent-tabs-mode: nil -*- */
2 /*
3  StreamReader.h
4 
5  Qore Programming Language
6 
7  Copyright (C) 2016 - 2022 Qore Technologies, s.r.o.
8 
9  Permission is hereby granted, free of charge, to any person obtaining a
10  copy of this software and associated documentation files (the "Software"),
11  to deal in the Software without restriction, including without limitation
12  the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  and/or sell copies of the Software, and to permit persons to whom the
14  Software is furnished to do so, subject to the following conditions:
15 
16  The above copyright notice and this permission notice shall be included in
17  all copies or substantial portions of the Software.
18 
19  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25  DEALINGS IN THE SOFTWARE.
26 
27  Note that the Qore library is released under a choice of three open-source
28  licenses: MIT (as above), LGPL 2+, or GPL 2+; see README-LICENSE for more
29  information.
30 */
31 
32 #ifndef _QORE_STREAMREADER_H
33 #define _QORE_STREAMREADER_H
34 
35 #include <cstdint>
36 
37 #include "qore/qore_bitopts.h"
38 #include "qore/InputStream.h"
39 #include "qore/intern/StringReaderHelper.h"
40 
41 DLLLOCAL extern qore_classid_t CID_STREAMREADER;
42 DLLLOCAL extern QoreClass* QC_STREAMREADER;
43 
46 public:
47  DLLLOCAL StreamReader(ExceptionSink* xsink, InputStream* is, const QoreEncoding* encoding = QCS_DEFAULT) :
48  in(is, xsink),
49  enc(encoding) {
50  }
51 
52  virtual DLLLOCAL ~StreamReader() {
53  }
54 
55  DLLLOCAL const QoreEncoding* getEncoding() const {
56  return enc;
57  }
58 
59  DLLLOCAL const InputStream* getInputStream() const {
60  return *in;
61  }
62 
63  DLLLOCAL void setEncoding(const QoreEncoding* n_enc) {
64  enc = n_enc;
65  }
66 
68 
73  DLLLOCAL BinaryNode* readBinary(int64 limit, ExceptionSink* xsink) {
74  if (limit == 0)
75  return 0;
77  char buffer[STREAMREADER_BUFFER_SIZE];
78  if (limit == -1) {
79  while (true) {
80  int rc = readData(xsink, buffer, STREAMREADER_BUFFER_SIZE, false);
81  if (*xsink)
82  return 0;
83  if (rc == 0)
84  break;
85  b->append(buffer, rc);
86  }
87  } else {
88  while (limit > 0) {
89  int rc = readData(xsink, buffer, QORE_MIN(limit, STREAMREADER_BUFFER_SIZE), false);
90  if (*xsink)
91  return 0;
92  if (rc == 0)
93  break;
94  b->append(buffer, rc);
95  limit -= rc;
96  }
97  }
98 
99  return b->empty() ? 0 : b.release();
100  }
101 
103 
108  DLLLOCAL QoreStringNode* readString(int64 size, ExceptionSink* xsink) {
109  return q_read_string(xsink, size, enc, std::bind(&StreamReader::readData, this, _3, _1, _2, false));
110  }
111 
113 
119  DLLLOCAL QoreStringNode* readLine(const QoreStringNode* eol, bool trim, ExceptionSink* xsink) {
120  if (!eol && !enc->isAsciiCompat()) {
121  QoreString nl("\n");
122  return readLineEol(&nl, trim, xsink);
123  }
124 
125  return eol ? readLineEol(eol, trim, xsink) : readLine(trim, xsink);
126  }
127 
128  DLLLOCAL QoreStringNode* readLineEol(const QoreString* eol, bool trim, ExceptionSink* xsink) {
129  TempEncodingHelper eolstr(eol, enc, xsink);
130  if (*xsink)
131  return 0;
132  eolstr.removeBom();
133 
135 
136  size_t eolpos = 0;
137 
138  while (true) {
139  signed char c;
140  int64 rc = readData(xsink, &c, 1, false);
141  //printd(5, "StreamReader::readLineEol() eolpos: %d/%d rc: %d c: %d str: '%s' (%s)\n", eolpos, eolstr->size(), rc, c, str->c_str(), enc->getCode());
142  if (*xsink)
143  return 0;
144  if (!rc)
145  return str->empty() ? 0 : q_remove_bom_utf16(str.release(), enc);
146 
147  // add the char to the string
148  str->concat(c);
149 
150  if ((**eolstr)[eolpos] == c) {
151  ++eolpos;
152  if (eolpos == eolstr->size()) {
153  if (trim)
154  str->terminate(str->size() - eolpos);
155  return q_remove_bom_utf16(str.release(), enc);
156  }
157  } else if (eolpos) {
158  // check all positions to see if the string matches
159  bool found = false;
160  for (size_t i = eolpos; i; --i) {
161  // we have to use memcmp here because we could be dealing with character
162  // encodings that include nulls in the string (ex: UTF-16*)
163  if (!memcmp(eolstr->c_str(), str->c_str() + str->size() - i, i)) {
164  found = true;
165  if (eolpos != i)
166  eolpos = i;
167  break;
168  }
169  }
170  if (!found)
171  eolpos = 0;
172  }
173  }
174  }
175 
176  DLLLOCAL QoreStringNode* readLine(bool trim, ExceptionSink* xsink) {
178 
179  while (true) {
180  signed char c;
181  int64 rc = readData(xsink, &c, 1, false);
182  if (*xsink)
183  return 0;
184  if (!rc) { // End of stream.
185  return str->empty() ? 0 : str.release();
186  }
187 
188  if (c == '\n') {
189  if (!trim)
190  str->concat(c);
191  return str.release();
192  } else if (c == '\r') {
193  if (!trim)
194  str->concat(c);
195  int64 p = peek(xsink);
196  if (*xsink)
197  return 0;
198  if (p == '\n') {
199  readData(xsink, &c, 1);
200  if (!trim)
201  str->concat((char)p);
202  }
203  return str.release();
204  }
205  str->concat(c);
206  }
207  }
208 
209  DLLLOCAL int64 readi1(ExceptionSink* xsink) {
210  signed char i = 0;
211  if (readData(xsink, &i, 1) < 0)
212  return 0;
213  return i;
214  }
215 
216  DLLLOCAL int64 readi2(ExceptionSink* xsink) {
217  signed short i = 0;
218  if (readData(xsink, &i, 2) < 0)
219  return 0;
220  i = ntohs(i);
221  return i;
222  }
223 
224  DLLLOCAL int64 readi4(ExceptionSink* xsink) {
225  int32_t i = 0;
226  if (readData(xsink, &i, 4) < 0)
227  return 0;
228  i = ntohl(i);
229  return i;
230  }
231 
232  DLLLOCAL int64 readi8(ExceptionSink* xsink) {
233  int64 i = 0;
234  if (readData(xsink, &i, 8) < 0)
235  return 0;
236  i = i8MSB(i);
237  return i;
238  }
239 
240  DLLLOCAL int64 readi2LSB(ExceptionSink* xsink) {
241  signed short i = 0;
242  if (readData(xsink, &i, 2) < 0)
243  return 0;
244  i = i2LSB(i);
245  return i;
246  }
247 
248  DLLLOCAL int64 readi4LSB(ExceptionSink* xsink) {
249  int32_t i = 0;
250  if (readData(xsink, &i, 4) < 0)
251  return 0;
252  i = i4LSB(i);
253  return i;
254  }
255 
256  DLLLOCAL int64 readi8LSB(ExceptionSink* xsink) {
257  int64 i = 0;
258  if (readData(xsink, &i, 8) < 0)
259  return 0;
260  i = i8LSB(i);
261  return i;
262  }
263 
264  DLLLOCAL int64 readu1(ExceptionSink* xsink) {
265  unsigned char i = 0;
266  if (readData(xsink, &i, 1) < 0)
267  return 0;
268  return i;
269  }
270 
271  DLLLOCAL int64 readu2(ExceptionSink* xsink) {
272  unsigned short i = 0;
273  if (readData(xsink, &i, 2) < 0)
274  return 0;
275  i = ntohs(i);
276  return i;
277  }
278 
279  DLLLOCAL int64 readu4(ExceptionSink* xsink) {
280  uint32_t i = 0;
281  if (readData(xsink, &i, 4) < 0)
282  return 0;
283  i = ntohl(i);
284  return i;
285  }
286 
287  DLLLOCAL int64 readu2LSB(ExceptionSink* xsink) {
288  unsigned short i = 0;
289  if (readData(xsink, &i, 2) < 0)
290  return 0;
291  i = i2LSB(i);
292  return i;
293  }
294 
295  DLLLOCAL int64 readu4LSB(ExceptionSink* xsink) {
296  uint32_t i = 0;
297  if (readData(xsink, &i, 4) < 0)
298  return 0;
299  i = i4LSB(i);
300  return i;
301  }
302 
309  int64 rc = peek(xsink);
310  if (rc < 0) {
311  if (!*xsink) {
312  if (rc == -1) {
313  xsink->raiseException("END-OF-STREAM-ERROR", "there is not enough data available in the stream; 1 byte was requested, and 0 were read");
314  }
315  else {
316  assert(*xsink);
317  }
318  }
319  return -1;
320  }
321  return rc;
322  }
323 
325 
334  DLLLOCAL virtual qore_offset_t read(ExceptionSink* xsink, void* dest, size_t limit, bool require_all = true) {
335  return readData(xsink, dest, limit, require_all);
336  }
337 
338  DLLLOCAL virtual const char* getName() const { return "StreamReader"; }
339 
340 protected:
341  // default buffer size (note that I/O is generally unbuffered in this class)
342  static const int STREAMREADER_BUFFER_SIZE = 4096;
343 
346 
349 
350 private:
352 
359  DLLLOCAL virtual qore_offset_t readData(ExceptionSink* xsink, void* dest, size_t limit, bool require_all = true) {
360  assert(dest);
361  assert(limit > 0);
362  char* destPtr = static_cast<char*>(dest);
363  size_t read = 0;
364  while (true) {
365  int64 rc = in->read(destPtr + read, limit - read, xsink);
366  if (*xsink)
367  return -1;
368  //printd(5, "StreamReader::readData() dest: %p limit: " QLLD " read: " QLLD " rc: " QLLD " char: %d\n", dest, limit, read, rc, destPtr[0]);
369  if (!rc) {
370  if (require_all) {
371  xsink->raiseException("END-OF-STREAM-ERROR", "there is not enough data available in the stream; " QSD " bytes were requested, and " QSD " were read", limit, read);
372  return -1;
373  }
374  break;
375  }
376  read += rc;
377  if (read == limit)
378  break;
379  }
380  return read;
381  }
382 
388  virtual int64 peek(ExceptionSink* xsink) {
389  return in->peek(xsink);
390  }
391 };
392 
393 #endif // _QORE_STREAMREADER_H
DLLEXPORT const QoreEncoding * QCS_DEFAULT
the default encoding for the Qore library
#define QORE_MIN(a, b)
macro to return the minimum of 2 numbers
Definition: QoreLib.h:547
the base class for all data to be used as private data of Qore objects
Definition: AbstractPrivateData.h:44
holds arbitrary binary data
Definition: BinaryNode.h:41
DLLEXPORT void append(const void *nptr, size_t size)
resizes the object and appends a copy of the data passed to the object
DLLEXPORT bool empty() const
returns true if empty
container for holding Qore-language exception information and also for registering a "thread_exit" ca...
Definition: ExceptionSink.h:48
DLLEXPORT AbstractQoreNode * raiseException(const char *err, const char *fmt,...)
appends a Qore-language exception to the list
Interface for private data of input streams.
Definition: InputStream.h:44
virtual int64 peek(ExceptionSink *xsink)=0
Peeks the next byte from the input stream.
virtual int64 read(void *ptr, int64 limit, ExceptionSink *xsink)=0
Reads up to `limit` bytes from the input stream.
defines a Qore-language class
Definition: QoreClass.h:239
defines string encoding functions in Qore
Definition: QoreEncoding.h:83
DLLEXPORT bool isAsciiCompat() const
returns true if the character encoding is backwards-compatible with ASCII
Qore's string type supported by the QoreEncoding class.
Definition: QoreString.h:93
Qore's string value type, reference counted, dynamically-allocated only.
Definition: QoreStringNode.h:50
Private data for the Qore::StreamReader class.
Definition: StreamReader.h:45
virtual DLLLOCAL qore_offset_t read(ExceptionSink *xsink, void *dest, size_t limit, bool require_all=true)
Read data until a limit.
Definition: StreamReader.h:334
DLLLOCAL BinaryNode * readBinary(int64 limit, ExceptionSink *xsink)
Read binary data from the stream.
Definition: StreamReader.h:73
const QoreEncoding * enc
Encoding of the source input stream.
Definition: StreamReader.h:348
DLLLOCAL QoreStringNode * readString(int64 size, ExceptionSink *xsink)
Read string data from the stream.
Definition: StreamReader.h:108
ReferenceHolder< InputStream > in
Source input stream.
Definition: StreamReader.h:345
DLLLOCAL QoreStringNode * readLine(const QoreStringNode *eol, bool trim, ExceptionSink *xsink)
Read one line.
Definition: StreamReader.h:119
int64 peekCheck(ExceptionSink *xsink)
Peeks the next byte from the input stream.
Definition: StreamReader.h:308
use this class to manage strings where the character encoding must be specified and may be different ...
Definition: QoreString.h:1104
unsigned qore_classid_t
used for the unique class ID for QoreClass objects
Definition: common.h:79
intptr_t qore_offset_t
used for offsets that could be negative
Definition: common.h:76
long long int64
64bit integer type, cannot use int64_t here since it breaks the API on some 64-bit systems due to equ...
Definition: common.h:260